summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h50
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c135
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c375
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c576
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c130
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c64
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c36
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c14
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c20
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c69
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h3
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c475
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h489
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c187
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h329
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c387
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c1056
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c390
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c109
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h3
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c159
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h18
30 files changed, 4453 insertions, 750 deletions
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf03098..c28af1823a2d 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP) += srp/
obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
obj-$(CONFIG_INFINIBAND_ISER) += iser/
obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index da12717a3eb7..ff50a7bd66d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,7 +52,6 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/sched.h>
-
/* constants */
enum ipoib_flush_level {
@@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
skb_pull(skb, IPOIB_HARD_LEN);
}
+static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev)
+{
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ return rn->clnt_priv;
+}
+
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -404,6 +410,7 @@ struct ipoib_dev_priv {
struct timer_list poll_timer;
unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
+ const struct net_device_ops *rn_ops;
};
struct ipoib_ah {
@@ -416,7 +423,7 @@ struct ipoib_ah {
struct ipoib_path {
struct net_device *dev;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
struct ipoib_ah *ah;
struct sk_buff_head queue;
@@ -472,7 +479,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr);
+ struct ib_pd *pd, struct rdma_ah_attr *attr);
void ipoib_free_ah(struct kref *kref);
static inline void ipoib_put_ah(struct ipoib_ah *ah)
{
@@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn);
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *format);
+void ipoib_ib_tx_timer_func(unsigned long ctx);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
+int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
-int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev);
+void ipoib_ib_dev_up(struct net_device *dev);
+void ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop_default(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -513,7 +521,7 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
-int ipoib_mcast_start_thread(struct net_device *dev);
+void ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
@@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey);
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid);
void ipoib_mcast_remove_list(struct list_head *remove_list);
void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
struct list_head *remove_list);
@@ -587,13 +597,13 @@ void __exit ipoib_netlink_fini(void);
void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
int ipoib_set_mode(struct net_device *dev, const char *buf);
-void ipoib_setup(struct net_device *dev);
+void ipoib_setup_common(struct net_device *dev);
void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
#define IPOIB_FLAGS_RC 0x80
#define IPOIB_FLAGS_UC 0x40
@@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
@@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return !!priv->cm.srq;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return priv->cm.max_cm_mtu;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 096c4f6fbd65..f87d104837dc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -38,6 +38,8 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/moduleparam.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include "ipoib.h"
@@ -91,7 +93,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -117,7 +119,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -144,7 +146,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
u64 mapping[IPOIB_CM_RX_SG],
gfp_t gfp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int i;
@@ -195,7 +197,7 @@ partial_error:
static void ipoib_cm_free_rx_ring(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i)
@@ -234,7 +236,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
struct ipoib_cm_rx *p = ctx;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
unsigned long flags;
if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
@@ -250,7 +252,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->recv_cq, /* For drain WR */
@@ -275,7 +277,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -330,7 +332,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < priv->cm.num_frags; ++i)
@@ -348,7 +350,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct {
struct ib_recv_wr wr;
struct ib_sge sge[IPOIB_CM_RX_SG];
@@ -363,7 +365,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
t = kmalloc(sizeof *t, GFP_KERNEL);
if (!t) {
ret = -ENOMEM;
- goto err_free;
+ goto err_free_1;
}
ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
@@ -410,6 +412,8 @@ err_count:
err_free:
kfree(t);
+
+err_free_1:
ipoib_cm_free_rx_ring(dev, rx->rx_ring);
return ret;
@@ -419,7 +423,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
struct ib_qp *qp, struct ib_cm_req_event_param *req,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_rep_param rep = {};
@@ -439,7 +443,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned psn;
int ret;
@@ -512,7 +516,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
/* Fall through */
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
- priv = netdev_priv(p->dev);
+ priv = ipoib_priv(p->dev);
if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
/* Fall through */
@@ -556,7 +560,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx_buf *rx_ring;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
@@ -705,7 +709,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
@@ -783,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
@@ -820,9 +824,12 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
wc->status != IB_WC_WR_FLUSH_ERR) {
struct ipoib_neigh *neigh;
- ipoib_dbg(priv, "failed cm send event "
- "(status=%d, wrid=%d vend_err %x)\n",
- wc->status, wr_id, wc->vendor_err);
+ if (wc->status != IB_WC_RNR_RETRY_EXC_ERR)
+ ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ else
+ ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
@@ -849,7 +856,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
int ipoib_cm_dev_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
@@ -881,7 +888,7 @@ err_cm:
static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *rx, *n;
LIST_HEAD(list);
@@ -904,7 +911,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
void ipoib_cm_dev_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned long begin;
int ret;
@@ -948,7 +955,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
break;
}
spin_unlock_irq(&priv->lock);
- msleep(1);
+ usleep_range(1000, 2000);
ipoib_drain_cq(dev);
spin_lock_irq(&priv->lock);
}
@@ -963,7 +970,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct ipoib_cm_tx *p = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_cm_data *data = event->private_data;
struct sk_buff_head skqueue;
struct ib_qp_attr qp_attr;
@@ -1015,9 +1022,10 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = p->dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed "
- "to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
}
ret = ib_send_cm_rtu(cm_id, NULL, 0);
@@ -1030,7 +1038,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
@@ -1040,9 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
.qp_context = tx,
- .create_flags = IB_QP_CREATE_USE_GFP_NOIO
+ .create_flags = 0
};
-
struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG)
@@ -1050,10 +1057,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr);
- if (PTR_ERR(tx_qp) == -EINVAL) {
- attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
- tx_qp = ib_create_qp(priv->pd, &attr);
- }
tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp;
}
@@ -1061,9 +1064,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
static int ipoib_cm_send_req(struct net_device *dev,
struct ib_cm_id *id, struct ib_qp *qp,
u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_req_param req = {};
@@ -1098,7 +1101,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
static int ipoib_cm_modify_tx_init(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
@@ -1121,13 +1124,14 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev,
}
static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
+ unsigned int noio_flag;
int ret;
- p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
- GFP_NOIO, PAGE_KERNEL);
+ noio_flag = memalloc_noio_save();
+ p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring));
if (!p->tx_ring) {
ret = -ENOMEM;
goto err_tx;
@@ -1135,9 +1139,10 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
+ memalloc_noio_restore(noio_flag);
if (IS_ERR(p->qp)) {
ret = PTR_ERR(p->qp);
- ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
+ ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
goto err_qp;
}
@@ -1151,13 +1156,13 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp);
if (ret) {
ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
- goto err_modify;
+ goto err_modify_send;
}
ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
if (ret) {
ipoib_warn(priv, "failed to send cm req: %d\n", ret);
- goto err_send_cm;
+ goto err_modify_send;
}
ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
@@ -1165,8 +1170,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
return 0;
-err_send_cm:
-err_modify:
+err_modify_send:
ib_destroy_cm_id(p->id);
err_id:
p->id = NULL;
@@ -1180,7 +1184,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_tx_buf *tx_req;
unsigned long begin;
@@ -1200,7 +1204,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
goto timeout;
}
- msleep(1);
+ usleep_range(1000, 2000);
}
}
@@ -1230,7 +1234,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct ipoib_cm_tx *tx = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -1281,7 +1285,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
tx = kzalloc(sizeof *tx, GFP_ATOMIC);
@@ -1300,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
unsigned long flags;
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -1326,7 +1330,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ipoib_path *path;
int ret;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
u32 qpn;
netif_tx_lock_bh(dev);
@@ -1388,7 +1392,7 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
- list_del(&p->list);
+ list_del_init(&p->list);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
ipoib_cm_tx_destroy(p);
@@ -1435,7 +1439,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
@@ -1484,7 +1488,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+ struct net_device *dev = to_net_dev(d);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
return sprintf(buf, "connected\n");
@@ -1497,7 +1502,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{
struct net_device *dev = to_net_dev(d);
int ret;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
return -EPERM;
@@ -1507,12 +1512,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
ret = ipoib_set_mode(dev, buf);
- rtnl_unlock();
-
- if (!ret)
- return count;
+ /* The assumption is that the function ipoib_set_mode returned
+ * with the rtnl held by it, if not the value -EBUSY returned,
+ * then no need to rtnl_unlock
+ */
+ if (ret != -EBUSY)
+ rtnl_unlock();
- return ret;
+ return (!ret || ret == -EBUSY) ? count : ret;
}
static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
@@ -1524,7 +1531,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_srq_init_attr srq_init_attr = {
.srq_type = IB_SRQT_BASIC,
.attr = {
@@ -1553,7 +1560,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1614,7 +1621,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!priv->cm.srq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 7b6d40ff1acf..7871379342f4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,12 +60,12 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- struct ipoib_dev_priv *priv = netdev_priv(netdev);
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
- strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
+ strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
sizeof(drvinfo->bus_info));
strlcpy(drvinfo->version, ipoib_driver_version,
@@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
static int ipoib_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
@@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
static int ipoib_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
/*
@@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
return -EOPNOTSUPP;
}
+/* Return lane speed in unit of 1e6 bit/sec */
+static inline int ib_speed_enum_to_int(int speed)
+{
+ switch (speed) {
+ case IB_SPEED_SDR:
+ return SPEED_2500;
+ case IB_SPEED_DDR:
+ return SPEED_5000;
+ case IB_SPEED_QDR:
+ case IB_SPEED_FDR10:
+ return SPEED_10000;
+ case IB_SPEED_FDR:
+ return SPEED_14000;
+ case IB_SPEED_EDR:
+ return SPEED_25000;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
+static int ipoib_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
+ struct ib_port_attr attr;
+ int ret, speed, width;
+
+ if (!netif_carrier_ok(netdev)) {
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ return 0;
+ }
+
+ ret = ib_query_port(priv->ca, priv->port, &attr);
+ if (ret < 0)
+ return -EINVAL;
+
+ speed = ib_speed_enum_to_int(attr.active_speed);
+ width = ib_width_enum_to_int(attr.active_width);
+
+ if (speed < 0 || width < 0)
+ return -EINVAL;
+
+ /* Except the following are set, the other members of
+ * the struct ethtool_link_settings are initialized to
+ * zero in the function __ethtool_get_link_ksettings.
+ */
+ cmd->base.speed = speed * width;
+ cmd->base.duplex = DUPLEX_FULL;
+
+ cmd->base.phy_address = 0xFF;
+
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ cmd->base.port = PORT_OTHER;
+
+ return 0;
+}
+
static const struct ethtool_ops ipoib_ethtool_ops = {
+ .get_link_ksettings = ipoib_get_link_ksettings,
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740e2691..11f74cbe6660 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -210,16 +210,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
seq_printf(file,
"GID: %s\n"
" complete: %6s\n",
- gid_buf, path.pathrec.dlid ? "yes" : "no");
+ gid_buf, sa_path_get_dlid(&path.pathrec) ? "yes" : "no");
- if (path.pathrec.dlid) {
+ if (sa_path_get_dlid(&path.pathrec)) {
rate = ib_rate_to_mbps(path.pathrec.rate);
seq_printf(file,
" DLID: 0x%04x\n"
" SL: %12d\n"
" rate: %8d.%d Gb/sec\n",
- be16_to_cpu(path.pathrec.dlid),
+ be32_to_cpu(sa_path_get_dlid(&path.pathrec)),
path.pathrec.sl,
rate / 1000, rate % 1000);
}
@@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
@@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev)
void ipoib_delete_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
+ WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
+ priv->mcg_dentry = priv->path_dentry = NULL;
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5038f9d2d753..57a9655e844d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(data_debug_level,
#endif
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr)
+ struct ib_pd *pd, struct rdma_ah_attr *attr)
{
struct ipoib_ah *ah;
struct ib_ah *vah;
@@ -65,13 +65,13 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah->last_send = 0;
kref_init(&ah->ref);
- vah = ib_create_ah(pd, attr);
+ vah = rdma_create_ah(pd, attr);
if (IS_ERR(vah)) {
kfree(ah);
ah = (struct ipoib_ah *)vah;
} else {
ah->ah = vah;
- ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
+ ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
}
return ah;
@@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
void ipoib_free_ah(struct kref *kref)
{
struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
- struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
unsigned long flags;
@@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int ret;
@@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int buf_size;
u64 *mapping;
@@ -153,7 +153,7 @@ error:
static int ipoib_ib_post_receives(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
@@ -381,7 +381,7 @@ free_res:
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
@@ -485,14 +485,14 @@ poll_more:
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
{
struct net_device *dev = dev_ptr;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
napi_schedule(&priv->napi);
}
static void drain_tx_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
netif_tx_lock(dev);
while (poll_tx(priv))
@@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev)
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
- struct ib_ah *address, u32 qpn,
+ struct ib_ah *address, u32 dqpn,
struct ipoib_tx_buf *tx_req,
void *head, int hlen)
{
@@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
priv->tx_wr.wr.wr_id = wr_id;
- priv->tx_wr.remote_qpn = qpn;
+ priv->tx_wr.remote_qpn = dqpn;
priv->tx_wr.ah = address;
if (head) {
@@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
}
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn)
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
@@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
- return;
+ return -1;
}
phead = NULL;
hlen = 0;
@@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
@@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
}
- ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
- skb->len, address, qpn);
+ ipoib_dbg_data(priv,
+ "sending packet, length=%d address=%p dqpn=0x%06x\n",
+ skb->len, address, dqpn);
/*
* We put the skb into the tx_ring _before_ we call post_send()
@@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
skb_dst_drop(skb);
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, tx_req, phead, hlen);
+ address, dqpn, tx_req, phead, hlen);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
@@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
+ rc = 0;
} else {
netif_trans_update(dev);
- address->last_send = priv->tx_head;
+ rc = priv->tx_head;
++priv->tx_head;
}
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
while (poll_tx(priv))
; /* nothing */
+
+ return rc;
}
static void __ipoib_reap_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -654,7 +658,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- ib_destroy_ah(ah->ah);
+ rdma_destroy_ah(ah->ah);
kfree(ah);
}
@@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work)
static void ipoib_flush_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
@@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev)
static void ipoib_stop_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
-static void ipoib_ib_tx_timer_func(unsigned long ctx)
+static int recvs_pending(struct net_device *dev)
{
- drain_tx_cq((struct net_device *)ctx);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int pending = 0;
+ int i;
+
+ for (i = 0; i < ipoib_recvq_size; ++i)
+ if (priv->rx_ring[i].skb)
+ ++pending;
+
+ return pending;
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_stop_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int ret;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ib_qp_attr qp_attr;
+ unsigned long begin;
+ struct ipoib_tx_buf *tx_req;
+ int i;
- ipoib_pkey_dev_check_presence(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_disable(&priv->napi);
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
- (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
- return -1;
+ ipoib_cm_dev_stop(dev);
+
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
+ qp_attr.qp_state = IB_QPS_ERR;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+
+ /* Wait for all sends and receives to complete */
+ begin = jiffies;
+
+ while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv,
+ "timing out; %d sends %d receives not completed\n",
+ priv->tx_head - priv->tx_tail,
+ recvs_pending(dev));
+
+ /*
+ * assume the HW is wedged and just free up
+ * all our pending work requests.
+ */
+ while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+ tx_req = &priv->tx_ring[priv->tx_tail &
+ (ipoib_sendq_size - 1)];
+ ipoib_dma_unmap_tx(priv, tx_req);
+ dev_kfree_skb_any(tx_req->skb);
+ ++priv->tx_tail;
+ --priv->tx_outstanding;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ struct ipoib_rx_buf *rx_req;
+
+ rx_req = &priv->rx_ring[i];
+ if (!rx_req->skb)
+ continue;
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
+ dev_kfree_skb_any(rx_req->skb);
+ rx_req->skb = NULL;
+ }
+
+ goto timeout;
+ }
+
+ ipoib_drain_cq(dev);
+
+ usleep_range(1000, 2000);
}
+ ipoib_dbg(priv, "All sends and receives done.\n");
+
+timeout:
+ del_timer_sync(&priv->poll_timer);
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+ return 0;
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_flush_ah(dev);
+
+ return 0;
+}
+
+void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+ drain_tx_cq((struct net_device *)ctx);
+}
+
+int ipoib_ib_dev_open_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -719,33 +817,59 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- goto dev_stop;
+ goto out;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- goto dev_stop;
+ goto out;
+ }
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+
+ return 0;
+out:
+ return -1;
+}
+
+int ipoib_ib_dev_open(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
+ return -1;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ if (priv->rn_ops->ndo_open(dev)) {
+ pr_warn("%s: Failed to open dev\n", dev->name);
+ goto dev_stop;
+ }
+
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
+
dev_stop:
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ cancel_delayed_work(&priv->ah_reap_task);
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
ipoib_ib_dev_stop(dev);
return -1;
}
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!(priv->pkey & 0x7fff) ||
ib_find_pkey(priv->ca, priv->port, priv->pkey,
@@ -755,25 +879,25 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
-int ipoib_ib_dev_up(struct net_device *dev)
+void ipoib_ib_dev_up(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_pkey_dev_check_presence(dev);
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
ipoib_dbg(priv, "PKEY is not assigned.\n");
- return 0;
+ return;
}
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
- return ipoib_mcast_start_thread(dev);
+ ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev)
+void ipoib_ib_dev_down(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "downing ib_dev\n");
@@ -784,26 +908,11 @@ int ipoib_ib_dev_down(struct net_device *dev)
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
-
- return 0;
-}
-
-static int recvs_pending(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int pending = 0;
- int i;
-
- for (i = 0; i < ipoib_recvq_size; ++i)
- if (priv->rx_ring[i].skb)
- ++pending;
-
- return pending;
}
void ipoib_drain_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i, n;
/*
@@ -840,109 +949,6 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-int ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_qp_attr qp_attr;
- unsigned long begin;
- struct ipoib_tx_buf *tx_req;
- int i;
-
- if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_disable(&priv->napi);
-
- ipoib_cm_dev_stop(dev);
-
- /*
- * Move our QP to the error state and then reinitialize in
- * when all work requests have completed or have been flushed.
- */
- qp_attr.qp_state = IB_QPS_ERR;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
-
- /* Wait for all sends and receives to complete */
- begin = jiffies;
-
- while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
- if (time_after(jiffies, begin + 5 * HZ)) {
- ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
- priv->tx_head - priv->tx_tail, recvs_pending(dev));
-
- /*
- * assume the HW is wedged and just free up
- * all our pending work requests.
- */
- while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
- tx_req = &priv->tx_ring[priv->tx_tail &
- (ipoib_sendq_size - 1)];
- ipoib_dma_unmap_tx(priv, tx_req);
- dev_kfree_skb_any(tx_req->skb);
- ++priv->tx_tail;
- --priv->tx_outstanding;
- }
-
- for (i = 0; i < ipoib_recvq_size; ++i) {
- struct ipoib_rx_buf *rx_req;
-
- rx_req = &priv->rx_ring[i];
- if (!rx_req->skb)
- continue;
- ipoib_ud_dma_unmap_rx(priv,
- priv->rx_ring[i].mapping);
- dev_kfree_skb_any(rx_req->skb);
- rx_req->skb = NULL;
- }
-
- goto timeout;
- }
-
- ipoib_drain_cq(dev);
-
- msleep(1);
- }
-
- ipoib_dbg(priv, "All sends and receives done.\n");
-
-timeout:
- del_timer_sync(&priv->poll_timer);
- qp_attr.qp_state = IB_QPS_RESET;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to RESET state\n");
-
- ipoib_flush_ah(dev);
-
- ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-
- return 0;
-}
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- priv->ca = ca;
- priv->port = port;
- priv->qp = NULL;
-
- if (ipoib_transport_dev_init(dev, ca)) {
- printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
- return -ENODEV;
- }
-
- setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
- (unsigned long) dev);
-
- if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
- ipoib_transport_dev_cleanup(dev);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
/*
* Takes whatever value which is in pkey index 0 and updates priv->pkey
* returns 0 if the pkey value was changed.
@@ -971,6 +977,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
+
+ /*
+ * Update the broadcast address in the priv->broadcast object,
+ * in case it already exists, otherwise no one will do that.
+ */
+ if (priv->broadcast) {
+ spin_lock_irq(&priv->lock);
+ memcpy(priv->broadcast->mcmember.mgid.raw,
+ priv->dev->broadcast + 4,
+ sizeof(union ib_gid));
+ spin_unlock_irq(&priv->lock);
+ }
+
return 0;
}
@@ -1220,7 +1239,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
/*
@@ -1240,7 +1259,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_stop_ah(dev);
- ipoib_transport_dev_cleanup(dev);
-}
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ priv->rn_ops->ndo_uninit(dev);
+
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3ce0765a05ab..4ce315c92b48 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_info *ni = ptr;
+ struct net_device *dev = ni->dev;
+
+ if (dev->netdev_ops->ndo_open != ipoib_open)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_CHANGENAME:
+ ipoib_delete_debug_files(dev);
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_UNREGISTER:
+ ipoib_delete_debug_files(dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
int ipoib_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "bringing up interface\n");
@@ -126,8 +153,7 @@ int ipoib_open(struct net_device *dev)
goto err_disable;
}
- if (ipoib_ib_dev_up(dev))
- goto err_stop;
+ ipoib_ib_dev_up(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -150,9 +176,6 @@ int ipoib_open(struct net_device *dev)
return 0;
-err_stop:
- ipoib_ib_dev_stop(dev);
-
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -161,7 +184,7 @@ err_disable:
static int ipoib_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "stopping interface\n");
@@ -199,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -209,7 +232,8 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = 0;
/* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev)) {
@@ -229,9 +253,38 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
priv->admin_mtu = new_mtu;
- dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+ if (priv->mcast_mtu < priv->admin_mtu)
+ ipoib_dbg(priv, "MTU must be smaller than the underlying "
+ "link layer MTU - 4 (%u)\n", priv->mcast_mtu);
- return 0;
+ new_mtu = min(priv->mcast_mtu, priv->admin_mtu);
+
+ if (priv->rn_ops->ndo_change_mtu) {
+ bool carrier_status = netif_carrier_ok(dev);
+
+ netif_carrier_off(dev);
+
+ /* notify lower level on the real mtu */
+ ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu);
+
+ if (carrier_status)
+ netif_carrier_on(dev);
+ } else {
+ dev->mtu = new_mtu;
+ }
+
+ return ret;
+}
+
+static void ipoib_get_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if (priv->rn_ops->ndo_get_stats64)
+ priv->rn_ops->ndo_get_stats64(dev, stats);
+ else
+ netdev_stats_to_stats64(stats, &dev->stats);
}
/* Called with an RCU read lock taken */
@@ -468,7 +521,14 @@ static struct net_device *ipoib_get_net_dev_by_params(
int ipoib_set_mode(struct net_device *dev, const char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
+ !strcmp(buf, "connected\n")) ||
+ (!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
+ !strcmp(buf, "datagram\n"))) {
+ return 0;
+ }
/* flush paths if we switch modes so that connections are restarted */
if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
@@ -481,8 +541,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev);
- rtnl_lock();
- return 0;
+ return (!rtnl_trylock()) ? -EBUSY : 0;
}
if (!strcmp(buf, "datagram\n")) {
@@ -491,8 +550,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
rtnl_unlock();
ipoib_flush_paths(dev);
- rtnl_lock();
- return 0;
+ return (!rtnl_trylock()) ? -EBUSY : 0;
}
return -EINVAL;
@@ -500,7 +558,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
struct ipoib_path *path;
int ret;
@@ -524,7 +582,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
static int __path_add(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->path_tree.rb_node;
struct rb_node *pn = NULL;
struct ipoib_path *tpath;
@@ -559,7 +617,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- ipoib_dbg(netdev_priv(dev), "path_free\n");
+ ipoib_dbg(ipoib_priv(dev), "path_free\n");
/* remove all neigh connected to this path */
ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -593,7 +651,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
int ipoib_path_iter_next(struct ipoib_path_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_path *path;
int ret = 1;
@@ -630,95 +688,32 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
void ipoib_mark_paths_invalid(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
spin_lock_irq(&priv->lock);
list_for_each_entry_safe(path, tp, &priv->path_list, list) {
- ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
- be16_to_cpu(path->pathrec.dlid),
- path->pathrec.dgid.raw);
+ ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
+ path->pathrec.dgid.raw);
path->valid = 0;
}
spin_unlock_irq(&priv->lock);
}
-struct classport_info_context {
- struct ipoib_dev_priv *priv;
- struct completion done;
- struct ib_sa_query *sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
- void *context)
+static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
{
- struct classport_info_context *cb_ctx = context;
- struct ipoib_dev_priv *priv;
-
- WARN_ON(!context);
-
- priv = cb_ctx->priv;
-
- if (status || !rec) {
- pr_debug("device: %s failed query classport_info status: %d\n",
- priv->dev->name, status);
- /* keeps the default, will try next mcast_restart */
- priv->sm_fullmember_sendonly_support = false;
- goto out;
- }
-
- if (ib_get_cpi_capmask2(rec) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
- pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = true;
- } else {
- pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = false;
- }
-
-out:
- complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
- struct classport_info_context *callback_context;
- int ret;
-
- callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
- if (!callback_context)
- return -ENOMEM;
-
- callback_context->priv = priv;
- init_completion(&callback_context->done);
-
- ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
- priv->ca, priv->port, 3000,
- GFP_KERNEL,
- classport_info_query_cb,
- callback_context,
- &callback_context->sa_query);
- if (ret < 0) {
- pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
- priv->dev->name, ret);
- kfree(callback_context);
- return ret;
- }
-
- /* waiting for the callback to finish before returnning */
- wait_for_completion(&callback_context->done);
- kfree(callback_context);
+ struct ipoib_pseudo_header *phdr;
- return ret;
+ phdr = skb_push(skb, sizeof(*phdr));
+ memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
}
void ipoib_flush_paths(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -747,12 +742,12 @@ void ipoib_flush_paths(struct net_device *dev)
}
static void path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *path_ptr)
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah = NULL;
struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
@@ -762,7 +757,8 @@ static void path_rec_completion(int status,
if (!status)
ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
- be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
+ be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->dgid.raw);
else
ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
status, path->pathrec.dgid.raw);
@@ -770,7 +766,7 @@ static void path_rec_completion(int status,
skb_queue_head_init(&skqueue);
if (!status) {
- struct ib_ah_attr av;
+ struct rdma_ah_attr av;
if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
ah = ipoib_create_ah(dev, priv->pd, &av);
@@ -785,7 +781,8 @@ static void path_rec_completion(int status,
path->ah = ah;
ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
- ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+ ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->sl);
while ((skb = __skb_dequeue(&path->queue)))
__skb_queue_tail(&skqueue, skb);
@@ -834,16 +831,18 @@ static void path_rec_completion(int status,
ipoib_put_ah(old_ah);
while ((skb = __skb_dequeue(&skqueue))) {
+ int ret;
skb->dev = dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed "
- "to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s: dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
}
}
static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path;
if (!priv->broadcast)
@@ -859,6 +858,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
INIT_LIST_HEAD(&path->neigh_list);
+ if (rdma_cap_opa_ah(priv->ca, priv->port))
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
path->pathrec.sgid = priv->local_gid;
path->pathrec.pkey = cpu_to_be16(priv->pkey);
@@ -871,7 +874,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
static int path_rec_start(struct net_device *dev,
struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "Start path record lookup for %pI6\n",
path->pathrec.dgid.raw);
@@ -902,7 +905,8 @@ static int path_rec_start(struct net_device *dev,
static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -940,8 +944,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
if (skb_queue_len(&neigh->queue) <
IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
} else {
ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
@@ -950,7 +953,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(daddr));
ipoib_neigh_put(neigh);
return;
}
@@ -959,10 +963,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path))
goto err_path;
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
- else
+ } else {
goto err_drop;
+ }
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -982,7 +988,8 @@ err_drop:
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
struct ipoib_pseudo_header *phdr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
unsigned long flags;
@@ -998,8 +1005,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path) {
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1023,16 +1029,16 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path->ah) {
- ipoib_dbg(priv, "Send unicast ARP to %04x\n",
- be16_to_cpu(path->pathrec.dlid));
+ ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1044,7 +1050,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_neigh *neigh;
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
@@ -1108,13 +1115,13 @@ send_using_neigh:
goto unref;
}
} else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+ neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
goto unref;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, sizeof(*phdr));
+ push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1131,7 +1138,7 @@ unref:
static void ipoib_timeout(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1146,10 +1153,9 @@ static int ipoib_hard_header(struct sk_buff *skb,
unsigned short type,
const void *daddr, const void *saddr, unsigned len)
{
- struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
- header = (struct ipoib_header *) skb_push(skb, sizeof *header);
+ header = skb_push(skb, sizeof *header);
header->proto = htons(type);
header->reserved = 0;
@@ -1159,15 +1165,14 @@ static int ipoib_hard_header(struct sk_buff *skb,
* destination address into skb hard header so we can figure out where
* to send the packet later.
*/
- phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
+ push_pseudo_header(skb, daddr);
return IPOIB_HARD_LEN;
}
static void ipoib_set_mcast_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1179,7 +1184,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
static int ipoib_get_iflink(const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1207,7 +1212,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh = NULL;
@@ -1286,7 +1291,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from path/mc list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else {
np = &neigh->hnext;
@@ -1336,7 +1341,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh;
@@ -1393,7 +1398,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
{
/* neigh reference count was dropprd to zero */
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
if (neigh->ah)
ipoib_put_ah(neigh->ah);
@@ -1403,7 +1408,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
- ipoib_dbg(netdev_priv(dev),
+ ipoib_dbg(ipoib_priv(dev),
"neigh free for %06x %pI6\n",
IPOIB_QPN(neigh->daddr),
neigh->daddr + 4);
@@ -1425,7 +1430,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
void ipoib_neigh_free(struct ipoib_neigh *neigh)
{
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh __rcu **np;
@@ -1450,7 +1455,7 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from parent list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
return;
} else {
@@ -1508,7 +1513,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
unsigned long flags;
@@ -1535,7 +1540,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from parent list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else {
np = &neigh->hnext;
@@ -1577,7 +1582,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from path/mc list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
}
}
@@ -1594,7 +1599,7 @@ out_unlock:
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1611,10 +1616,28 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
wait_for_completion(&priv->ntbl.deleted);
}
+static void ipoib_dev_uninit_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_transport_dev_cleanup(dev);
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+ netif_napi_del(&priv->napi);
+
+ ipoib_cm_dev_cleanup(dev);
+
+ kfree(priv->rx_ring);
+ vfree(priv->tx_ring);
+
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1625,46 +1648,112 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
- ca->name, ipoib_sendq_size);
+ priv->ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ if (ipoib_transport_dev_init(dev, priv->ca)) {
+ pr_warn("%s: ipoib_transport_dev_init failed\n",
+ priv->ca->name);
goto out_tx_ring_cleanup;
+ }
+
+ /* after qp created set dev address */
+ priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+ priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
+ priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+ setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+ (unsigned long)dev);
+
+ return 0;
+
+out_tx_ring_cleanup:
+ vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+ kfree(priv->rx_ring);
+
+out:
+ netif_napi_del(&priv->napi);
+ return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = -ENOMEM;
+
+ priv->ca = ca;
+ priv->port = port;
+ priv->qp = NULL;
/*
- * Must be after ipoib_ib_dev_init so we can allocate a per
- * device wq there and use it here
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
*/
- if (ipoib_neigh_hash_init(priv) < 0)
+ priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+ if (!priv->wq) {
+ pr_warn("%s: failed to allocate device WQ\n", dev->name);
+ goto out;
+ }
+
+ /* create pd, which used both for control and datapath*/
+ priv->pd = ib_alloc_pd(priv->ca, 0);
+ if (IS_ERR(priv->pd)) {
+ pr_warn("%s: failed to allocate PD\n", ca->name);
+ goto clean_wq;
+ }
+
+ ret = priv->rn_ops->ndo_init(dev);
+ if (ret) {
+ pr_warn("%s failed to init HW resource\n", dev->name);
+ goto out_free_pd;
+ }
+
+ if (ipoib_neigh_hash_init(priv) < 0) {
+ pr_warn("%s failed to init neigh hash\n", dev->name);
goto out_dev_uninit;
+ }
+
+ if (dev->flags & IFF_UP) {
+ if (ipoib_ib_dev_open(dev)) {
+ pr_warn("%s failed to open device\n", dev->name);
+ ret = -ENODEV;
+ goto out_dev_uninit;
+ }
+ }
return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
-out_tx_ring_cleanup:
- vfree(priv->tx_ring);
+out_free_pd:
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
-out_rx_ring_cleanup:
- kfree(priv->rx_ring);
+clean_wq:
+ if (priv->wq) {
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
out:
- return -ENOMEM;
+ return ret;
}
void ipoib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
LIST_HEAD(head);
ASSERT_RTNL();
- ipoib_delete_debug_files(dev);
-
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
@@ -1674,24 +1763,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
- /*
- * Must be before ipoib_ib_dev_cleanup or we delete an in use
- * work queue
- */
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
- kfree(priv->rx_ring);
- vfree(priv->tx_ring);
-
- priv->rx_ring = NULL;
- priv->tx_ring = NULL;
+ /* no more works over the priv->wq */
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
}
@@ -1699,7 +1785,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
static int ipoib_get_vf_config(struct net_device *dev, int vf,
struct ifla_vf_info *ivf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int err;
err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1713,7 +1799,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
return -EINVAL;
@@ -1724,7 +1810,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
static int ipoib_get_vf_stats(struct net_device *dev, int vf,
struct ifla_vf_stats *vf_stats)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
}
@@ -1748,6 +1834,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_get_vf_stats = ipoib_get_vf_stats,
.ndo_set_vf_guid = ipoib_set_vf_guid,
.ndo_set_mac_address = ipoib_set_mac,
+ .ndo_get_stats64 = ipoib_get_stats,
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -1762,21 +1849,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_get_iflink = ipoib_get_iflink,
};
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
- dev->netdev_ops = &ipoib_netdev_ops_vf;
- else
- dev->netdev_ops = &ipoib_netdev_ops_pf;
-
dev->header_ops = &ipoib_header_ops;
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
dev->watchdog_timeo = HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
@@ -1790,11 +1868,14 @@ void ipoib_setup(struct net_device *dev)
netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
- priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ priv->dev = dev;
spin_lock_init(&priv->lock);
-
init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
@@ -1812,22 +1893,100 @@ void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+ .ndo_init = ipoib_dev_init_default,
+ .ndo_uninit = ipoib_dev_uninit_default,
+ .ndo_open = ipoib_ib_dev_open_default,
+ .ndo_stop = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
{
struct net_device *dev;
+ struct rdma_netdev *rn;
- dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
- NET_NAME_UNKNOWN, ipoib_setup);
+ dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+ name,
+ name_assign_type, setup);
if (!dev)
return NULL;
- return netdev_priv(dev);
+ rn = netdev_priv(dev);
+
+ rn->send = ipoib_send;
+ rn->attach_mcast = ipoib_mcast_attach;
+ rn->detach_mcast = ipoib_mcast_detach;
+ rn->free_rdma_netdev = free_netdev;
+ rn->hca = hca;
+
+ dev->netdev_ops = &ipoib_netdev_default_pf;
+
+ return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+
+ if (hca->alloc_rdma_netdev) {
+ dev = hca->alloc_rdma_netdev(hca, port,
+ RDMA_NETDEV_IPOIB, name,
+ NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+ if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+ return NULL;
+ }
+
+ if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+ dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+
+ return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+ struct ipoib_dev_priv *priv;
+ struct rdma_netdev *rn;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ dev = ipoib_get_netdev(hca, port, name);
+ if (!dev)
+ goto free_priv;
+
+ priv->rn_ops = dev->netdev_ops;
+
+ /* fixme : should be after the query_cap */
+ if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+ dev->netdev_ops = &ipoib_netdev_ops_vf;
+ else
+ dev->netdev_ops = &ipoib_netdev_ops_pf;
+
+ rn = netdev_priv(dev);
+ rn->clnt_priv = priv;
+ ipoib_build_priv(dev);
+
+ return priv;
+free_priv:
+ kfree(priv);
+ return NULL;
}
static ssize_t show_pkey(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "0x%04x\n", priv->pkey);
}
@@ -1836,14 +1995,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
static ssize_t show_umcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
{
- struct ipoib_dev_priv *priv = netdev_priv(ndev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1916,7 +2076,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
@@ -1984,20 +2144,18 @@ int ipoib_add_pkey_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_pkey);
}
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+ priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
priv->dev->features |= priv->dev->hw_features;
}
-
- return 0;
}
static struct net_device *ipoib_add_port(const char *format,
@@ -2007,11 +2165,11 @@ static struct net_device *ipoib_add_port(const char *format,
struct ib_port_attr attr;
int result = -ENOMEM;
- priv = ipoib_intf_alloc(format);
+ priv = ipoib_intf_alloc(hca, port, format);
if (!priv)
goto alloc_mem_failed;
- SET_NETDEV_DEV(priv->dev, hca->dma_device);
+ SET_NETDEV_DEV(priv->dev, hca->dev.parent);
priv->dev->dev_id = port - 1;
result = ib_query_port(hca, port, &attr);
@@ -2037,9 +2195,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed;
}
- result = ipoib_set_dev_features(priv, hca);
- if (result)
- goto device_init_failed;
+ ipoib_set_dev_features(priv, hca);
/*
* Set the full membership bit, so that we join the right
@@ -2083,8 +2239,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
+ result = -ENOMEM;
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@@ -2099,7 +2254,6 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@@ -2115,6 +2269,7 @@ event_failed:
device_init_failed:
free_netdev(priv->dev);
+ kfree(priv);
alloc_mem_failed:
return ERR_PTR(result);
@@ -2139,7 +2294,7 @@ static void ipoib_add_one(struct ib_device *device)
continue;
dev = ipoib_add_port("ib%d", device, p);
if (!IS_ERR(dev)) {
- priv = netdev_priv(dev);
+ priv = ipoib_priv(dev);
list_add_tail(&priv->list, dev_list);
count++;
}
@@ -2155,13 +2310,15 @@ static void ipoib_add_one(struct ib_device *device)
static void ipoib_remove_one(struct ib_device *device, void *client_data)
{
- struct ipoib_dev_priv *priv, *tmp;
+ struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
struct list_head *dev_list = client_data;
if (!dev_list)
return;
list_for_each_entry_safe(priv, tmp, dev_list, list) {
+ struct rdma_netdev *rn = netdev_priv(priv->dev);
+
ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
@@ -2178,12 +2335,23 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
- free_netdev(priv->dev);
+ rn->free_rdma_netdev(priv->dev);
+
+ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+ kfree(cpriv);
+
+ kfree(priv);
}
kfree(dev_list);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+ .notifier_call = ipoib_netdev_event,
+};
+#endif
+
static int __init ipoib_init_module(void)
{
int ret;
@@ -2236,6 +2404,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
return 0;
err_client:
@@ -2253,6 +2424,9 @@ err_fs:
static void __exit ipoib_cleanup_module(void)
{
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fddff403d5d2..057f58e6afca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
- ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+ ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
/* remove all neigh connected to this mcast */
@@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
while (n) {
@@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
while (*n) {
@@ -212,8 +212,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ib_sa_mcmember_rec *mcmember)
{
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_ah *ah;
+ struct rdma_ah_attr av;
int ret;
int set_qkey = 0;
@@ -260,8 +262,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
- ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid, set_qkey);
+ ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid),
+ set_qkey, priv->qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
mcast->mcmember.mgid.raw);
@@ -271,40 +274,34 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
}
- {
- struct ib_ah_attr av = {
- .dlid = be16_to_cpu(mcast->mcmember.mlid),
- .port_num = priv->port,
- .sl = mcast->mcmember.sl,
- .ah_flags = IB_AH_GRH,
- .static_rate = mcast->mcmember.rate,
- .grh = {
- .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
- .hop_limit = mcast->mcmember.hop_limit,
- .sgid_index = 0,
- .traffic_class = mcast->mcmember.traffic_class
- }
- };
- av.grh.dgid = mcast->mcmember.mgid;
-
- ah = ipoib_create_ah(dev, priv->pd, &av);
- if (IS_ERR(ah)) {
- ipoib_warn(priv, "ib_address_create failed %ld\n",
- -PTR_ERR(ah));
- /* use original error */
- return PTR_ERR(ah);
- } else {
- spin_lock_irq(&priv->lock);
- mcast->ah = ah;
- spin_unlock_irq(&priv->lock);
-
- ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
- mcast->mcmember.mgid.raw,
- mcast->ah->ah,
- be16_to_cpu(mcast->mcmember.mlid),
- mcast->mcmember.sl);
- }
+ memset(&av, 0, sizeof(av));
+ av.type = rdma_ah_find_type(priv->ca, priv->port);
+ rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+ rdma_ah_set_port_num(&av, priv->port);
+ rdma_ah_set_sl(&av, mcast->mcmember.sl);
+ rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
+
+ rdma_ah_set_grh(&av, &mcast->mcmember.mgid,
+ be32_to_cpu(mcast->mcmember.flow_label),
+ 0, mcast->mcmember.hop_limit,
+ mcast->mcmember.traffic_class);
+
+ ah = ipoib_create_ah(dev, priv->pd, &av);
+ if (IS_ERR(ah)) {
+ ipoib_warn(priv, "ib_address_create failed %ld\n",
+ -PTR_ERR(ah));
+ /* use original error */
+ return PTR_ERR(ah);
}
+ spin_lock_irq(&priv->lock);
+ mcast->ah = ah;
+ spin_unlock_irq(&priv->lock);
+
+ ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+ mcast->mcmember.mgid.raw,
+ mcast->ah->ah,
+ be16_to_cpu(mcast->mcmember.mlid),
+ mcast->mcmember.sl);
/* actually send any queued packets */
netif_tx_lock_bh(dev);
@@ -314,9 +311,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
netif_tx_unlock_bh(dev);
skb->dev = dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
netif_tx_lock_bh(dev);
}
netif_tx_unlock_bh(dev);
@@ -329,7 +328,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
- int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
@@ -342,11 +340,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
- ret = ipoib_check_sm_sendonly_fullmember_support(priv);
- if (ret < 0)
- pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
- priv->dev->name, ret);
-
+ priv->sm_fullmember_sendonly_support =
+ ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
+ priv->ca, priv->port);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
@@ -373,7 +369,7 @@ static int ipoib_mcast_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
@@ -475,7 +471,7 @@ out_locked:
*/
static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = {
.join_state = 1
@@ -487,6 +483,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return -EINVAL;
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
rec.mgid = mcast->mcmember.mgid;
@@ -645,8 +644,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (mcast->backoff == 1 ||
time_after_eq(jiffies, mcast->delay_until)) {
/* Found the next unjoined group */
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (ipoib_mcast_join(dev, mcast)) {
spin_unlock_irq(&priv->lock);
return;
@@ -666,17 +663,15 @@ out:
queue_delayed_work(priv->wq, &priv->mcast_task,
delay_until - jiffies);
}
- if (mcast) {
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (mcast)
ipoib_mcast_join(dev, mcast);
- }
+
spin_unlock_irq(&priv->lock);
}
-int ipoib_mcast_start_thread(struct net_device *dev)
+void ipoib_mcast_start_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n");
@@ -684,13 +679,11 @@ int ipoib_mcast_start_thread(struct net_device *dev)
spin_lock_irqsave(&priv->lock, flags);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
spin_unlock_irqrestore(&priv->lock, flags);
-
- return 0;
}
int ipoib_mcast_stop_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -706,7 +699,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
@@ -720,8 +714,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
mcast->mcmember.mgid.raw);
/* Remove ourselves from the multicast group */
- ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
- be16_to_cpu(mcast->mcmember.mlid));
+ ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
} else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -762,7 +756,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
void *mgid = daddr + 4;
@@ -825,7 +820,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
}
}
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
+ IB_MULTICAST_QPN);
if (neigh)
ipoib_neigh_put(neigh);
return;
@@ -837,7 +833,7 @@ unlock:
void ipoib_mcast_dev_flush(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
unsigned long flags;
@@ -1029,7 +1025,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_mcast *mcast;
int ret = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index cdc7df4fdb8a..3e44087935ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
@@ -64,8 +64,9 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int ipoib_changelink(struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+static int ipoib_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
u16 mode, umcast;
int ret = 0;
@@ -93,7 +94,8 @@ out_err:
}
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
struct net_device *pdev;
struct ipoib_dev_priv *ppriv;
@@ -107,7 +109,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
@@ -129,10 +131,11 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
*/
child_pkey |= 0x8000;
- err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+ err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
+ child_pkey, IPOIB_RTNL_CHILD);
if (!err && data)
- err = ipoib_changelink(dev, tb, data);
+ err = ipoib_changelink(dev, tb, data, extack);
return err;
}
@@ -140,8 +143,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
{
struct ipoib_dev_priv *priv, *ppriv;
- priv = netdev_priv(dev);
- ppriv = netdev_priv(priv->parent);
+ priv = ipoib_priv(dev);
+ ppriv = ipoib_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
@@ -161,7 +164,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
- .setup = ipoib_setup,
+ .setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
.dellink = ipoib_unregister_child_dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 189dcd1709d2..bb64baf25309 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -35,9 +35,10 @@
#include "ipoib.h"
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
@@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
goto out;
/* set correct QKey for QP */
- qp_attr->qkey = priv->qkey;
+ qp_attr->qkey = qkey;
ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
@@ -74,9 +75,20 @@ out:
return ret;
}
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
+ ret = ib_detach_mcast(priv->qp, mgid, mlid);
+
+ return ret;
+}
+
int ipoib_init_qp(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
struct ib_qp_attr qp_attr;
int attr_mask;
@@ -130,7 +142,7 @@ out_fail:
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
.max_send_wr = ipoib_sendq_size,
@@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
- priv->pd = ib_alloc_pd(priv->ca, 0);
- if (IS_ERR(priv->pd)) {
- printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
- }
-
- /*
- * the various IPoIB tasks assume they will never race against
- * themselves, so always use a single thread workqueue
- */
- priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
- if (!priv->wq) {
- printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
- goto out_free_pd;
- }
-
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
@@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
if (ret != -ENOSYS)
- goto out_free_wq;
+ return -ENODEV;
cq_attr.cqe = size;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_send_cq;
}
- priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
- priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
- priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
-
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
@@ -247,26 +239,18 @@ out_free_recv_cq:
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
-out_free_wq:
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
-
-out_free_pd:
- ib_dealloc_pd(priv->pd);
-
return -ENODEV;
}
void ipoib_transport_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (priv->qp) {
if (ib_destroy_qp(priv->qp))
ipoib_warn(priv, "ib_qp_destroy failed\n");
priv->qp = NULL;
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
if (ib_destroy_cq(priv->send_cq))
@@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
-
- ipoib_cm_dev_cleanup(dev);
-
- if (priv->wq) {
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
- }
-
- ib_dealloc_pd(priv->pd);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index fd811115af49..081b33deff1b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
+#include <linux/sched/signal.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -43,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
char *buf)
{
struct net_device *dev = to_net_dev(d);
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
@@ -61,9 +62,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
priv->parent = ppriv->dev;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
- result = ipoib_set_dev_features(priv, ppriv->ca);
- if (result)
- goto err;
+ ipoib_set_dev_features(priv, ppriv->ca);
priv->pkey = pkey;
@@ -87,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
@@ -109,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
sysfs_failed:
result = -ENOMEM;
- ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:
@@ -129,20 +125,21 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
- priv = ipoib_intf_alloc(intf_name);
- if (!priv)
- return -ENOMEM;
if (!rtnl_trylock())
return restart_syscall();
+ priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+ if (!priv)
+ return -ENOMEM;
+
down_write(&ppriv->vlan_rwsem);
/*
@@ -168,11 +165,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
out:
up_write(&ppriv->vlan_rwsem);
- if (result)
- free_netdev(priv->dev);
-
rtnl_unlock();
+ if (result) {
+ free_netdev(priv->dev);
+ kfree(priv);
+ }
+
return result;
}
@@ -184,7 +183,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
@@ -196,7 +195,6 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
- unregister_netdevice(priv->dev);
list_del(&priv->list);
dev = priv->dev;
break;
@@ -204,10 +202,16 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
}
up_write(&ppriv->vlan_rwsem);
+ if (dev) {
+ ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
+ unregister_netdevice(dev);
+ }
+
rtnl_unlock();
if (dev) {
free_netdev(dev);
+ kfree(priv);
return 0;
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index e71af717e71b..37b33d708c2d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht;
static struct iscsi_transport iscsi_iser_transport;
static struct scsi_transport_template *iscsi_iser_scsi_transport;
static struct workqueue_struct *release_wq;
+static DEFINE_MUTEX(unbind_iser_conn_mutex);
struct iser_global ig;
int iser_debug_level = 0;
@@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
*/
if (iser_conn) {
mutex_lock(&iser_conn->state_mutex);
+ mutex_lock(&unbind_iser_conn_mutex);
iser_conn_terminate(iser_conn);
iscsi_conn_stop(cls_conn, flag);
/* unbind */
iser_conn->iscsi_conn = NULL;
conn->dd_data = NULL;
+ mutex_unlock(&unbind_iser_conn_mutex);
complete(&iser_conn->stop_completion);
mutex_unlock(&iser_conn->state_mutex);
@@ -652,7 +655,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
if (iscsi_host_add(shost,
- ib_conn->device->ib_device->dma_device)) {
+ ib_conn->device->ib_device->dev.parent)) {
mutex_unlock(&iser_conn->state_mutex);
goto free_host;
}
@@ -977,13 +980,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
struct iser_conn *iser_conn;
struct ib_device *ib_dev;
+ mutex_lock(&unbind_iser_conn_mutex);
+
session = starget_to_session(scsi_target(sdev))->dd_data;
iser_conn = session->leadconn->dd_data;
+ if (!iser_conn) {
+ mutex_unlock(&unbind_iser_conn_mutex);
+ return -ENOTCONN;
+ }
ib_dev = iser_conn->ib_conn.device->ib_device;
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
+ mutex_unlock(&unbind_iser_conn_mutex);
+
return 0;
}
@@ -994,6 +1005,7 @@ static struct scsi_host_template iscsi_iser_sht = {
.change_queue_depth = scsi_change_queue_depth,
.sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN,
+ .eh_timed_out = iscsi_eh_cmd_timed_out,
.eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d0b22ad58c1..c1ae4aeae2f9 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@ struct iser_fr_desc {
struct list_head list;
struct iser_reg_resources rsc;
struct iser_pi_context *pi_ctx;
+ struct list_head all_list;
};
/**
@@ -443,6 +444,7 @@ struct iser_fr_pool {
struct list_head list;
spinlock_t lock;
int size;
+ struct list_head all_list;
};
/**
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 81ae2e30dd12..2a07692007bd 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -137,8 +137,10 @@ iser_prepare_write_cmd(struct iscsi_task *task,
if (unsol_sz < edtl) {
hdr->flags |= ISER_WSV;
- hdr->write_stag = cpu_to_be32(mem_reg->rkey);
- hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
+ if (buf_out->data_len > imm_sz) {
+ hdr->write_stag = cpu_to_be32(mem_reg->rkey);
+ hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
+ }
iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
"VA:%#llX + unsol:%d\n",
@@ -612,7 +614,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
iser_conn, rkey);
if (unlikely(!iser_conn->snd_w_inv)) {
- iser_err("conn %p: unexepected remote invalidation, "
+ iser_err("conn %p: unexpected remote invalidation, "
"terminating connection\n", iser_conn);
return -EPROTO;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 6a9d1cb548ee..26a004e97ae0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
int i, ret;
INIT_LIST_HEAD(&fr_pool->list);
+ INIT_LIST_HEAD(&fr_pool->all_list);
spin_lock_init(&fr_pool->lock);
fr_pool->size = 0;
for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
}
list_add_tail(&desc->list, &fr_pool->list);
+ list_add_tail(&desc->all_list, &fr_pool->all_list);
fr_pool->size++;
}
@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
struct iser_fr_desc *desc, *tmp;
int i = 0;
- if (list_empty(&fr_pool->list))
+ if (list_empty(&fr_pool->all_list))
return;
iser_info("freeing conn %p fr pool\n", ib_conn);
- list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
- list_del(&desc->list);
+ list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+ list_del(&desc->all_list);
iser_free_reg_res(&desc->rsc);
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
@@ -597,7 +599,9 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
iser_conn, ib_conn->cma_id, ib_conn->qp);
if (ib_conn->qp != NULL) {
+ mutex_lock(&ig.connlist_mutex);
ib_conn->comp->active_qps--;
+ mutex_unlock(&ig.connlist_mutex);
rdma_destroy_qp(ib_conn->cma_id);
ib_conn->qp = NULL;
}
@@ -704,8 +708,14 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
unsigned short sg_tablesize, sup_sg_tablesize;
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
- sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->ib_device->attrs.max_fast_reg_page_list_len);
+ if (device->ib_device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS)
+ sup_sg_tablesize =
+ min_t(
+ uint, ISCSI_ISER_MAX_SG_TABLESIZE,
+ device->ib_device->attrs.max_fast_reg_page_list_len);
+ else
+ sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize);
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 314e95516068..0e662656ef42 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -728,7 +728,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
break;
default:
- isert_warn("conn %p teminating in state %d\n",
+ isert_warn("conn %p terminating in state %d\n",
isert_conn, isert_conn->state);
}
mutex_unlock(&isert_conn->mutex);
@@ -817,6 +817,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
+ rx_desc->in_use = false;
}
rx_wr--;
rx_wr->next = NULL; /* mark end of work requests list */
@@ -835,6 +836,15 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
struct ib_recv_wr *rx_wr_failed, rx_wr;
int ret;
+ if (!rx_desc->in_use) {
+ /*
+ * if the descriptor is not in-use we already reposted it
+ * for recv, so just silently return
+ */
+ return 0;
+ }
+
+ rx_desc->in_use = false;
rx_wr.wr_cqe = &rx_desc->rx_cqe;
rx_wr.sg_list = &rx_desc->rx_sg;
rx_wr.num_sge = 1;
@@ -1397,6 +1407,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ rx_desc->in_use = true;
+
ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
@@ -1440,7 +1452,7 @@ static void
isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct isert_conn *isert_conn = wc->qp->qp_context;
- struct ib_device *ib_dev = isert_conn->cm_id->device;
+ struct ib_device *ib_dev = isert_conn->device->ib_device;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
isert_print_wc(wc, "login recv");
@@ -1659,10 +1671,23 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
- if (ret)
- transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
- else
- isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+ if (ret) {
+ /*
+ * transport_generic_request_failure() expects to have
+ * plus two references to handle queue-full, so re-add
+ * one here as target-core will have already dropped
+ * it after the first isert_put_datain() callback.
+ */
+ kref_get(&cmd->cmd_kref);
+ transport_generic_request_failure(cmd, cmd->pi_err);
+ } else {
+ /*
+ * XXX: isert_put_response() failure is not retried.
+ */
+ ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+ if (ret)
+ pr_warn_ratelimited("isert_put_response() ret: %d\n", ret);
+ }
}
static void
@@ -1699,13 +1724,15 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
spin_unlock_bh(&cmd->istate_lock);
- if (ret) {
- target_put_sess_cmd(se_cmd);
- transport_send_check_condition_and_sense(se_cmd,
- se_cmd->pi_err, 0);
- } else {
+ /*
+ * transport_generic_request_failure() will drop the extra
+ * se_cmd->cmd_kref reference after T10-PI error, and handle
+ * any non-zero ->queue_status() callback error retries.
+ */
+ if (ret)
+ transport_generic_request_failure(se_cmd, se_cmd->pi_err);
+ else
target_execute_cmd(se_cmd);
- }
}
static void
@@ -2171,26 +2198,28 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
chain_wr = &isert_cmd->tx_desc.send_wr;
}
- isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
- isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
- return 1;
+ rc = isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
+ isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ rc: %d\n",
+ isert_cmd, rc);
+ return rc;
}
static int
isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ int ret;
isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
- isert_rdma_rw_ctx_post(isert_cmd, conn->context,
- &isert_cmd->tx_desc.tx_cqe, NULL);
+ ret = isert_rdma_rw_ctx_post(isert_cmd, conn->context,
+ &isert_cmd->tx_desc.tx_cqe, NULL);
- isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
- isert_cmd);
- return 0;
+ isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE rc: %d\n",
+ isert_cmd, ret);
+ return ret;
}
static int
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index c02ada57d7f5..87d994de8c91 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -60,7 +60,7 @@
#define ISER_RX_PAD_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \
(ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \
- sizeof(struct ib_cqe)))
+ sizeof(struct ib_cqe) + sizeof(bool)))
#define ISCSI_ISER_SG_TABLESIZE 256
@@ -85,6 +85,7 @@ struct iser_rx_desc {
u64 dma_addr;
struct ib_sge rx_sg;
struct ib_cqe rx_cqe;
+ bool in_use;
char pad[ISER_RX_PAD_SIZE];
} __packed;
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000000000000..48132ab5e6b9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+ tristate "Intel OPA VNIC support"
+ depends on X86_64 && INFINIBAND
+ ---help---
+ This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+ driver for Ethernet over Omni-Path feature. It implements the HW
+ independent VNIC functionality. It interfaces with Linux stack for
+ data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000000000000..8061b287cfe4
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+ opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000000000000..afa938bd26d6
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "opa_vnic_internal.h"
+
+/* OPA 16B Header fields */
+#define OPA_16B_LID_MASK 0xFFFFFull
+#define OPA_16B_SLID_HIGH_SHFT 8
+#define OPA_16B_SLID_MASK 0xF00ull
+#define OPA_16B_DLID_MASK 0xF000ull
+#define OPA_16B_DLID_HIGH_SHFT 12
+#define OPA_16B_LEN_SHFT 20
+#define OPA_16B_SC_SHFT 20
+#define OPA_16B_RC_SHFT 25
+#define OPA_16B_PKEY_SHFT 16
+
+#define OPA_VNIC_L4_HDR_SHFT 16
+
+/* L2+L4 hdr len is 20 bytes (5 quad words) */
+#define OPA_VNIC_HDR_QW_LEN 5
+
+static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
+ u16 pkey, u16 entropy, u8 sc, u8 rc,
+ u8 l4_type, u16 l4_hdr)
+{
+ /* h[1]: LT=1, 16B L2=10 */
+ u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
+
+ h[2] = l4_type;
+ h[3] = entropy;
+ h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
+
+ /* Extract and set 4 upper bits and 20 lower bits of the lids */
+ h[0] |= (slid & OPA_16B_LID_MASK);
+ h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
+
+ h[1] |= (dlid & OPA_16B_LID_MASK);
+ h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
+
+ h[0] |= (len << OPA_16B_LEN_SHFT);
+ h[1] |= (rc << OPA_16B_RC_SHFT);
+ h[1] |= (sc << OPA_16B_SC_SHFT);
+ h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
+
+ memcpy(hdr, h, OPA_VNIC_HDR_LEN);
+}
+
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mactbl)
+ return;
+
+ vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+ hash_del(&node->hlist);
+ kfree(node);
+ }
+ kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+ u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+ struct hlist_head *mactbl;
+
+ mactbl = kzalloc(size, GFP_KERNEL);
+ if (!mactbl)
+ return ERR_PTR(-ENOMEM);
+
+ vnic_hash_init(mactbl);
+ return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+ struct hlist_head *mactbl;
+
+ mutex_lock(&adapter->mactbl_lock);
+ mactbl = rcu_access_pointer(adapter->mactbl);
+ rcu_assign_pointer(adapter->mactbl, NULL);
+ synchronize_rcu();
+ opa_vnic_free_mac_tbl(mactbl);
+ mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ int bkt;
+ u16 loffset, lnum_entries;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (!mactbl)
+ goto get_mac_done;
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ vnic_hash_for_each(mactbl, bkt, node, hlist) {
+ struct __opa_vnic_mactable_entry *nentry = &node->entry;
+ struct opa_veswport_mactable_entry *entry;
+
+ if ((node->index < loffset) ||
+ (node->index >= (loffset + lnum_entries)))
+ continue;
+
+ /* populate entry in the tbl corresponding to the index */
+ entry = &tbl->tbl_entries[node->index - loffset];
+ memcpy(entry->mac_addr, nentry->mac_addr,
+ ARRAY_SIZE(entry->mac_addr));
+ memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+ ARRAY_SIZE(entry->mac_addr_mask));
+ entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+ }
+ tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+ rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ * - Allocate a new mac (hash) table.
+ * - Add the specified entries to the new table.
+ * (except the ones that are requested to be deleted).
+ * - Add all the other entries from the old mac table.
+ * - If there is a failure, free the new table and return.
+ * - Switch to the new table.
+ * - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node, *new_node;
+ struct hlist_head *new_mactbl, *old_mactbl;
+ int i, bkt, rc = 0;
+ u8 key;
+ u16 loffset, lnum_entries;
+
+ mutex_lock(&adapter->mactbl_lock);
+ /* allocate new mac table */
+ new_mactbl = opa_vnic_alloc_mac_tbl();
+ if (IS_ERR(new_mactbl)) {
+ mutex_unlock(&adapter->mactbl_lock);
+ return PTR_ERR(new_mactbl);
+ }
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ /* add updated entries to the new mac table */
+ for (i = 0; i < lnum_entries; i++) {
+ struct __opa_vnic_mactable_entry *nentry;
+ struct opa_veswport_mactable_entry *entry =
+ &tbl->tbl_entries[i];
+ u8 *mac_addr = entry->mac_addr;
+ u8 empty_mac[ETH_ALEN] = { 0 };
+
+ v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
+ loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
+ mac_addr[3], mac_addr[4], mac_addr[5],
+ entry->dlid_sd);
+
+ /* if the entry is being removed, do not add it */
+ if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
+ continue;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ node->index = loffset + i;
+ nentry = &node->entry;
+ memcpy(nentry->mac_addr, entry->mac_addr,
+ ARRAY_SIZE(nentry->mac_addr));
+ memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
+ ARRAY_SIZE(nentry->mac_addr_mask));
+ nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
+ key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &node->hlist, key);
+ }
+
+ /* add other entries from current mac table to new mac table */
+ old_mactbl = rcu_access_pointer(adapter->mactbl);
+ if (!old_mactbl)
+ goto switch_tbl;
+
+ vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
+ if ((node->index >= loffset) &&
+ (node->index < (loffset + lnum_entries)))
+ continue;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ new_node->index = node->index;
+ memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
+ key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &new_node->hlist, key);
+ }
+
+switch_tbl:
+ /* switch to new table */
+ rcu_assign_pointer(adapter->mactbl, new_mactbl);
+ synchronize_rcu();
+
+ adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
+updt_done:
+ /* upon failure, free the new table; otherwise, free the old table */
+ if (rc)
+ opa_vnic_free_mac_tbl(new_mactbl);
+ else
+ opa_vnic_free_mac_tbl(old_mactbl);
+
+ mutex_unlock(&adapter->mactbl_lock);
+ return rc;
+}
+
+/* opa_vnic_chk_mac_tbl - check mac table for dlid */
+static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct ethhdr *mac_hdr)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ u32 dlid = 0;
+ u8 key;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (unlikely(!mactbl))
+ goto chk_done;
+
+ key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_for_each_possible(mactbl, node, hlist, key) {
+ struct __opa_vnic_mactable_entry *entry = &node->entry;
+
+ /* if related to source mac, skip */
+ if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
+ continue;
+
+ if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
+ ARRAY_SIZE(node->entry.mac_addr))) {
+ /* mac address found */
+ dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
+ break;
+ }
+ }
+
+chk_done:
+ rcu_read_unlock();
+ return dlid;
+}
+
+/* opa_vnic_get_dlid - find and return the DLID */
+static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
+ struct sk_buff *skb, u8 def_port)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u32 dlid;
+
+ dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
+ if (dlid)
+ return dlid;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+ dlid = info->vesw.u_mcast_dlid;
+ } else {
+ if (is_local_ether_addr(mac_hdr->h_dest)) {
+ dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+ ((uint32_t)mac_hdr->h_dest[4] << 8) |
+ mac_hdr->h_dest[3];
+ if (unlikely(!dlid))
+ v_warn("Null dlid in MAC address\n");
+ } else if (def_port != OPA_VNIC_INVALID_PORT) {
+ dlid = info->vesw.u_ucast_dlid[def_port];
+ }
+ }
+
+ return dlid;
+}
+
+/* opa_vnic_get_sc - return the service class */
+static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
+ struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u16 vlan_tci;
+ u8 sc;
+
+ if (!__vlan_get_tag(skb, &vlan_tci)) {
+ u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.pcp_to_sc_mc[pcp];
+ else
+ sc = info->vport.pcp_to_sc_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.non_vlan_sc_mc;
+ else
+ sc = info->vport.non_vlan_sc_uc;
+ }
+
+ return sc;
+}
+
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct __opa_veswport_info *info = &adapter->info;
+ u8 vl;
+
+ if (skb_vlan_tag_present(skb)) {
+ u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.pcp_to_vl_mc[pcp];
+ else
+ vl = info->vport.pcp_to_vl_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.non_vlan_vl_mc;
+ else
+ vl = info->vport.non_vlan_vl_uc;
+ }
+
+ return vl;
+}
+
+/* opa_vnic_calc_entropy - calculate the packet entropy */
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ u16 hash16;
+
+ /*
+ * Get flow based 16-bit hash and then XOR the upper and lower bytes
+ * to get the entropy.
+ * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+ */
+ hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+ return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* opa_vnic_get_def_port - get default port based on entropy */
+static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
+ u8 entropy)
+{
+ u8 flow_id;
+
+ /* Add the upper and lower 4-bits of entropy to get the flow id */
+ flow_id = ((entropy & 0xf) + (entropy >> 4));
+ return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int opa_vnic_wire_length(struct sk_buff *skb)
+{
+ u32 pad_len;
+
+ /* padding for 8 bytes size alignment */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ return (skb->len + pad_len) >> 3;
+}
+
+/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct opa_vnic_skb_mdata *mdata;
+ u8 def_port, sc, entropy, *hdr;
+ u16 len, l4_hdr;
+ u32 dlid;
+
+ hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
+
+ entropy = opa_vnic_calc_entropy(adapter, skb);
+ def_port = opa_vnic_get_def_port(adapter, entropy);
+ len = opa_vnic_wire_length(skb);
+ dlid = opa_vnic_get_dlid(adapter, skb, def_port);
+ sc = opa_vnic_get_sc(info, skb);
+ l4_hdr = info->vesw.vesw_id;
+
+ mdata = skb_push(skb, sizeof(*mdata));
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ mdata->entropy = entropy;
+ mdata->flags = 0;
+ if (unlikely(!dlid)) {
+ mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
+ return;
+ }
+
+ opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
+ info->vesw.pkey, entropy, sc, 0,
+ OPA_VNIC_L4_ETHR, l4_hdr);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
new file mode 100644
index 000000000000..4c434b9dd84c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -0,0 +1,489 @@
+#ifndef _OPA_VNIC_ENCAP_H
+#define _OPA_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all OPA VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION 0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA 0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO 0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
+/* VNIC configured and operational state values */
+#define OPA_VNIC_STATE_DROP_ALL 0x1
+#define OPA_VNIC_STATE_FORWARDING 0x3
+
+#define OPA_VESW_MAX_NUM_DEF_PORT 16
+#define OPA_VNIC_MAX_NUM_PCP 8
+
+#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8)
+
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN 2
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+ __be16 fabric_id;
+ __be16 vesw_id;
+
+ u8 rsvd0[6];
+ __be16 def_port_mask;
+
+ u8 rsvd1[2];
+ __be16 pkey;
+
+ u8 rsvd2[4];
+ __be32 u_mcast_dlid;
+ __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ __be16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+ __be32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ __be16 max_mac_tbl_ent;
+ __be16 max_smac_ent;
+ __be32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ __be32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ __be16 uc_macs_gen_count;
+ __be16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct opa_veswport_info - OPA vnic port information
+ * @vesw: OPA vnic switch information
+ * @vport: OPA vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct opa_veswport_info {
+ struct opa_vesw_info vesw;
+ struct opa_per_veswport_info vport;
+};
+
+/**
+ * struct opa_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct opa_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct opa_veswport_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ __be32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC port being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct opa_veswport_mactable {
+ __be16 offset;
+ __be16 num_entries;
+ __be32 mac_tbl_digest;
+ struct opa_veswport_mactable_entry tbl_entries[0];
+} __packed;
+
+/**
+ * struct opa_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct opa_veswport_summary_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+ __be64 tx_packets;
+ __be64 rx_packets;
+ __be64 tx_bytes;
+ __be64 rx_bytes;
+
+ __be64 tx_unicast;
+ __be64 tx_mcastbcast;
+
+ __be64 tx_untagged;
+ __be64 tx_vlan;
+
+ __be64 tx_64_size;
+ __be64 tx_65_127;
+ __be64 tx_128_255;
+ __be64 tx_256_511;
+ __be64 tx_512_1023;
+ __be64 tx_1024_1518;
+ __be64 tx_1519_max;
+
+ __be64 rx_unicast;
+ __be64 rx_mcastbcast;
+
+ __be64 rx_untagged;
+ __be64 rx_vlan;
+
+ __be64 rx_64_size;
+ __be64 rx_65_127;
+ __be64 rx_128_255;
+ __be64 rx_256_511;
+ __be64 rx_512_1023;
+ __be64 rx_1024_1518;
+ __be64 rx_1519_max;
+
+ __be64 reserved[16];
+} __packed;
+
+/**
+ * struct opa_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct opa_veswport_error_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+
+ __be64 rsvd0;
+ __be64 tx_smac_filt;
+ __be64 rsvd1;
+ __be64 rsvd2;
+ __be64 rsvd3;
+ __be64 tx_dlid_zero;
+ __be64 rsvd4;
+ __be64 tx_logic;
+ __be64 rsvd5;
+ __be64 tx_drop_state;
+
+ __be64 rx_bad_veswid;
+ __be64 rsvd6;
+ __be64 rx_runt;
+ __be64 rx_oversize;
+ __be64 rsvd7;
+ __be64 rx_eth_down;
+ __be64 rx_drop_state;
+ __be64 rx_logic;
+ __be64 rsvd8;
+
+ __be64 rsvd9[16];
+} __packed;
+
+/**
+ * struct opa_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @opaportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on opa port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct opa_veswport_trap {
+ __be16 fabric_id;
+ __be16 veswid;
+ __be32 veswportnum;
+ __be16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ __be32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct opa_vnic_iface_mac_entry {
+ u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct opa_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct opa_veswport_iface_macs {
+ __be16 start_idx;
+ __be16 num_macs_in_msg;
+ __be16 tot_macs_in_lst;
+ __be16 gen_count;
+ struct opa_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct opa_vnic_vema_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ u8 data[OPA_VNIC_EMA_DATA];
+};
+
+/**
+ * struct opa_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct opa_vnic_notice_attr {
+ u8 gen_type;
+ u8 oui_1;
+ u8 oui_2;
+ u8 oui_3;
+ __be16 trap_num;
+ __be16 toggle_count;
+ __be32 issuer_lid;
+ __be32 reserved;
+ u8 issuer_gid[16];
+ u8 raw_data[64];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct opa_vnic_vema_mad_trap {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ struct opa_vnic_notice_attr notice;
+};
+
+#endif /* _OPA_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
new file mode 100644
index 000000000000..62390e9e0023
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "opa_vnic_internal.h"
+
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ struct {
+ int sizeof_stat;
+ int stat_offset;
+ };
+};
+
+#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+ /* NETDEV stats */
+ {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+ {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+ {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+ {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+ {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+ {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+ {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+ {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+ /* SUMMARY counters */
+ {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+ {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+ {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+ {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+ {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+ {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+ {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+ {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+ {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+ {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+ {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+ {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+ {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+ {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+ {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+ {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+ {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+ {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+ {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+ {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+ {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+ {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+ /* ERROR counters */
+ {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+ {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+ {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+ {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+ {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+ {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+ {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+ {"rx_oversize", VNIC_STAT(rx_oversize)},
+ {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, opa_vnic_driver_version,
+ sizeof(drvinfo->version));
+ strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+ sizeof(drvinfo->bus_info));
+}
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+ return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+ int i;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+ for (i = 0; i < VNIC_STATS_LEN; i++) {
+ char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+ data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ int i;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ for (i = 0; i < VNIC_STATS_LEN; i++)
+ memcpy(data + i * ETH_GSTRING_LEN,
+ vnic_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+}
+
+/* ethtool ops */
+static const struct ethtool_ops opa_vnic_ethtool_ops = {
+ .get_drvinfo = vnic_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = vnic_get_strings,
+ .get_sset_count = vnic_get_sset_count,
+ .get_ethtool_stats = vnic_get_ethtool_stats,
+};
+
+/* opa_vnic_set_ethtool_ops - set ethtool ops */
+void opa_vnic_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &opa_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
new file mode 100644
index 000000000000..ca29e6d5aedc
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -0,0 +1,329 @@
+#ifndef _OPA_VNIC_INTERNAL_H
+#define _OPA_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+#include <rdma/opa_vnic.h>
+
+#include "opa_vnic_encap.h"
+
+#define OPA_VNIC_VLAN_PCP(vlan_tci) \
+ (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+/* Flow to default port redirection table size */
+#define OPA_VNIC_FLOW_TBL_SIZE 32
+
+/* Invalid port number */
+#define OPA_VNIC_INVALID_PORT 0xff
+
+struct opa_vnic_adapter;
+
+/**
+ * struct __opa_vesw_info - OPA vnic virtual switch info
+ *
+ * Same as opa_vesw_info without bitwise attribute.
+ */
+struct __opa_vesw_info {
+ u16 fabric_id;
+ u16 vesw_id;
+
+ u8 rsvd0[6];
+ u16 def_port_mask;
+
+ u8 rsvd1[2];
+ u16 pkey;
+
+ u8 rsvd2[4];
+ u32 u_mcast_dlid;
+ u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ u16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct __opa_per_veswport_info - OPA vnic per port info
+ *
+ * Same as opa_per_veswport_info without bitwise attribute.
+ */
+struct __opa_per_veswport_info {
+ u32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ u16 max_mac_tbl_ent;
+ u16 max_smac_ent;
+ u32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ u32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ u16 uc_macs_gen_count;
+ u16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct __opa_veswport_info - OPA vnic port info
+ *
+ * Same as opa_veswport_info without bitwise attribute.
+ */
+struct __opa_veswport_info {
+ struct __opa_vesw_info vesw;
+ struct __opa_per_veswport_info vport;
+};
+
+/**
+ * struct __opa_veswport_trap - OPA vnic trap info
+ *
+ * Same as opa_veswport_trap without bitwise attribute.
+ */
+struct __opa_veswport_trap {
+ u16 fabric_id;
+ u16 veswid;
+ u32 veswportnum;
+ u16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ u32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
+ */
+struct opa_vnic_ctrl_port {
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_ops *ops;
+ u8 num_ports;
+};
+
+/**
+ * struct opa_vnic_adapter - OPA VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
+ * @rn_ops: rdma netdev's net_device_ops
+ * @port_num: OPA port number
+ * @vport_num: vesw port number
+ * @lock: adapter lock
+ * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
+ * @mactbl: hash table of MAC entries
+ * @mactbl_lock: mac table lock
+ * @stats_lock: statistics lock
+ * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
+ */
+struct opa_vnic_adapter {
+ struct net_device *netdev;
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_port *cport;
+ const struct net_device_ops *rn_ops;
+
+ u8 port_num;
+ u8 vport_num;
+
+ /* Lock used around concurrent updates to netdev */
+ struct mutex lock;
+
+ struct __opa_veswport_info info;
+ u8 vema_mac_addr[ETH_ALEN];
+ u32 umac_hash;
+ u32 mmac_hash;
+ struct hlist_head __rcu *mactbl;
+
+ /* Lock used to protect updates to mac table */
+ struct mutex mactbl_lock;
+
+ /* Lock used to protect access to vnic counters */
+ spinlock_t stats_lock;
+
+ u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+ unsigned long trap_timeout;
+ u8 trap_count;
+};
+
+/* Same as opa_veswport_mactable_entry, but without bitwise attribute */
+struct __opa_vnic_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ u32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node
+ * @hlist: hash list handle
+ * @index: index of entry in the mac table
+ * @entry: entry in the table
+ */
+struct opa_vnic_mac_tbl_node {
+ struct hlist_node hlist;
+ u16 index;
+ struct __opa_vnic_mactable_entry entry;
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+ netdev_warn(adapter->netdev, format, ## arg)
+
+#define c_err(format, arg...) \
+ dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+ dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+ dev_dbg(&cport->ibdev->dev, format, ## arg)
+
+/* The maximum allowed entries in the mac table */
+#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048
+/* Limit of smac entries in mac table */
+#define OPA_VNIC_MAX_SMAC_LIMIT 256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define OPA_VNIC_MAC_HASH_IDX 5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define OPA_VNIC_MAC_TBL_HASH_BITS 8
+#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS)
+
+/* VNIC HASH MACROS */
+#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE)
+
+#define vnic_hash_add(hashtable, node, key) \
+ hlist_add_head(node, \
+ &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))])
+
+#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+#define vnic_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, \
+ &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member)
+
+#define vnic_hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num);
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs);
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs);
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event);
+void opa_vnic_set_ethtool_ops(struct net_device *netdev);
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid);
+
+#endif /* _OPA_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
new file mode 100644
index 000000000000..1a3c25364b64
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC) driver
+ * netdev functionality.
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+
+#include "opa_vnic_internal.h"
+
+#define OPA_TX_TIMEOUT_MS 1000
+
+#define OPA_VNIC_SKB_HEADROOM \
+ ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8)
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void opa_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+ memcpy(stats, &vstats.netstats, sizeof(*stats));
+}
+
+/* opa_netdev_start_xmit - transmit function */
+static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+
+ v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len);
+ /* pad to ensure mininum ethernet packet length */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ if (skb_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ skb_put(skb, ETH_ZLEN - skb->len);
+ }
+
+ opa_vnic_encap_skb(adapter, skb);
+ return adapter->rn_ops->ndo_start_xmit(skb, netdev);
+}
+
+static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ int rc;
+
+ /* pass entropy and vl as metadata in skb */
+ mdata = skb_push(skb, sizeof(*mdata));
+ mdata->entropy = opa_vnic_calc_entropy(adapter, skb);
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
+ accel_priv, fallback);
+ skb_pull(skb, sizeof(*mdata));
+ return rc;
+}
+
+/* opa_vnic_process_vema_config - process vema configuration updates */
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct rdma_netdev *rn = netdev_priv(adapter->netdev);
+ u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 };
+ struct net_device *netdev = adapter->netdev;
+ u8 i, port_count = 0;
+ u16 port_mask;
+
+ /* If the base_mac_addr is changed, update the interface mac address */
+ if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr))) {
+ struct sockaddr saddr;
+
+ memcpy(saddr.sa_data, info->vport.base_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr));
+ mutex_lock(&adapter->lock);
+ eth_mac_addr(netdev, &saddr);
+ memcpy(adapter->vema_mac_addr,
+ info->vport.base_mac_addr, ETH_ALEN);
+ mutex_unlock(&adapter->lock);
+ }
+
+ rn->set_id(netdev, info->vesw.vesw_id);
+
+ /* Handle MTU limit change */
+ rtnl_lock();
+ netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan,
+ netdev->min_mtu);
+ if (netdev->mtu > netdev->max_mtu)
+ dev_set_mtu(netdev, netdev->max_mtu);
+ rtnl_unlock();
+
+ /* Update flow to default port redirection table */
+ port_mask = info->vesw.def_port_mask;
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) {
+ if (port_mask & 1)
+ port_num[port_count++] = i;
+ port_mask >>= 1;
+ }
+
+ /*
+ * Build the flow table. Flow table is required when destination LID
+ * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported.
+ * Each flow need a default port number to get its dlid from the
+ * u_ucast_dlid array.
+ */
+ for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++)
+ adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+ OPA_VNIC_INVALID_PORT;
+
+ /* Operational state can only be DROP_ALL or FORWARDING */
+ if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) {
+ info->vport.oper_state = OPA_VNIC_STATE_FORWARDING;
+ netif_dormant_off(netdev);
+ } else {
+ info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ netif_dormant_on(netdev);
+ }
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter)
+{
+ adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES;
+ adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT;
+ adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+}
+
+/* opa_vnic_set_mac_addr - change mac address */
+static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct sockaddr *sa = addr;
+ int rc;
+
+ if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN))
+ return 0;
+
+ mutex_lock(&adapter->lock);
+ rc = eth_mac_addr(netdev, addr);
+ mutex_unlock(&adapter->lock);
+ if (rc)
+ return rc;
+
+ adapter->info.vport.uc_macs_gen_count++;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+ return 0;
+}
+
+/*
+ * opa_vnic_mac_send_event - post event on possible mac list exchange
+ * Send trap when digest from uc/mc mac list differs from previous run.
+ * Digest is evaluated similar to how cksum does.
+ */
+static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct netdev_hw_addr_list *hw_list;
+ u32 *ref_crc;
+ u32 l, crc = 0;
+
+ switch (event) {
+ case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+ hw_list = &netdev->uc;
+ adapter->info.vport.uc_macs_gen_count++;
+ ref_crc = &adapter->umac_hash;
+ break;
+ case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+ hw_list = &netdev->mc;
+ adapter->info.vport.mc_macs_gen_count++;
+ ref_crc = &adapter->mmac_hash;
+ break;
+ default:
+ return;
+ }
+ netdev_hw_addr_list_for_each(ha, hw_list) {
+ crc = crc32_le(crc, ha->addr, ETH_ALEN);
+ }
+ l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+ crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+ if (crc != *ref_crc) {
+ *ref_crc = crc;
+ opa_vnic_vema_report_event(adapter, event);
+ }
+}
+
+/* opa_vnic_set_rx_mode - handle uc/mc mac list change */
+static void opa_vnic_set_rx_mode(struct net_device *netdev)
+{
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
+}
+
+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_open(adapter->netdev);
+ if (rc) {
+ v_dbg("open failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* opa_netdev_close - disable network interface */
+static int opa_netdev_close(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_stop(adapter->netdev);
+ if (rc) {
+ v_dbg("close failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops opa_netdev_ops = {
+ .ndo_open = opa_netdev_open,
+ .ndo_stop = opa_netdev_close,
+ .ndo_start_xmit = opa_netdev_start_xmit,
+ .ndo_get_stats64 = opa_vnic_get_stats64,
+ .ndo_set_rx_mode = opa_vnic_set_rx_mode,
+ .ndo_select_queue = opa_vnic_select_queue,
+ .ndo_set_mac_address = opa_vnic_set_mac_addr,
+};
+
+/* opa_vnic_add_netdev - create vnic netdev interface */
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num)
+{
+ struct opa_vnic_adapter *adapter;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int rc;
+
+ netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+ else if (IS_ERR(netdev))
+ return ERR_CAST(netdev);
+
+ rn = netdev_priv(netdev);
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter) {
+ rc = -ENOMEM;
+ goto adapter_err;
+ }
+
+ rn->clnt_priv = adapter;
+ rn->hca = ibdev;
+ rn->port_num = port_num;
+ adapter->netdev = netdev;
+ adapter->ibdev = ibdev;
+ adapter->port_num = port_num;
+ adapter->vport_num = vport_num;
+ adapter->rn_ops = netdev->netdev_ops;
+
+ netdev->netdev_ops = &opa_netdev_ops;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM;
+ mutex_init(&adapter->lock);
+ mutex_init(&adapter->mactbl_lock);
+ spin_lock_init(&adapter->stats_lock);
+
+ SET_NETDEV_DEV(netdev, ibdev->dev.parent);
+
+ opa_vnic_set_ethtool_ops(netdev);
+
+ opa_vnic_set_pod_values(adapter);
+
+ rc = register_netdev(netdev);
+ if (rc)
+ goto netdev_err;
+
+ netif_carrier_off(netdev);
+ netif_dormant_on(netdev);
+ v_info("initialized\n");
+
+ return adapter;
+netdev_err:
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ kfree(adapter);
+adapter_err:
+ rn->free_rdma_netdev(netdev);
+
+ return ERR_PTR(rc);
+}
+
+/* opa_vnic_rem_netdev - remove vnic netdev interface */
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct rdma_netdev *rn = netdev_priv(netdev);
+
+ v_info("removing\n");
+ unregister_netdev(netdev);
+ opa_vnic_release_mac_tbl(adapter);
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ kfree(adapter);
+ rn->free_rdma_netdev(netdev);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
new file mode 100644
index 000000000000..cf768dd78d1b
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -0,0 +1,1056 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "opa_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char opa_vnic_driver_name[] = "opa_vnic";
+const char opa_vnic_driver_version[] = DRV_VERSION;
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define OPA_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000)
+
+#define OPA_VNIC_UNSUP_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define OPA_VNIC_INVAL_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define OPA_VNIC_CLASS_CAP_TRAP 0x1
+
+/* Maximum number of VNIC ports supported */
+#define OPA_VNIC_MAX_NUM_VPORT 255
+
+/**
+ * struct opa_vnic_vema_port -- VNIC VEMA port details
+ * @cport: pointer to port
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num: OPA port number
+ * @vport_idr: vnic ports idr
+ * @event_handler: ib event handler
+ * @lock: adapter interface lock
+ */
+struct opa_vnic_vema_port {
+ struct opa_vnic_ctrl_port *cport;
+ struct ib_mad_agent *mad_agent;
+ struct opa_class_port_info class_port_info;
+ u64 tid;
+ u8 port_num;
+ struct idr vport_idr;
+ struct ib_event_handler event_handler;
+
+ /* Lock to query/update network adapter */
+ struct mutex lock;
+};
+
+static void opa_vnic_vema_add_one(struct ib_device *device);
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data);
+
+static struct ib_client opa_vnic_client = {
+ .name = opa_vnic_driver_name,
+ .add = opa_vnic_vema_add_one,
+ .remove = opa_vnic_vema_rem_one,
+};
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad: Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
+{
+ return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
+}
+
+/**
+ * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ *
+ * Return: vnic adapter
+ */
+static inline struct opa_vnic_adapter *
+vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_port *port)
+{
+ u8 vport_num = vema_get_vport_num(recvd_mad);
+
+ return idr_find(&port->vport_idr, vport_num);
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
+{
+ u16 offset, num_entries;
+ u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+ sizeof(mac_tbl->tbl_entries[0]));
+
+ offset = be16_to_cpu(mac_tbl->offset);
+ num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+ return ((num_entries <= req_entries) &&
+ (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
+{
+ memset(port_info, 0, sizeof(*port_info));
+ port_info->vport.max_mac_tbl_ent =
+ cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
+ port_info->vport.max_smac_ent =
+ cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
+ port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_add_vport -- Add a new vnic port
+ * @port: ptr to opa_vnic_vema_port struct
+ * @vport_num: vnic port number (to be added)
+ *
+ * Return a pointer to the vnic adapter structure
+ */
+static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
+ u8 vport_num)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
+ if (!IS_ERR(adapter)) {
+ int rc;
+
+ adapter->cport = cport;
+ rc = idr_alloc(&port->vport_idr, adapter, vport_num,
+ vport_num + 1, GFP_NOWAIT);
+ if (rc < 0) {
+ opa_vnic_rem_netdev(adapter);
+ adapter = ERR_PTR(rc);
+ }
+ }
+
+ return adapter;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_class_port_info *port_info;
+
+ port_info = (struct opa_class_port_info *)rsp_mad->data;
+ memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+ port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->class_version = OPA_EMA_CLASS_VERSION;
+
+ /*
+ * Set capability mask bit indicating agent generates traps,
+ * and set the maximum number of VNIC ports supported.
+ */
+ port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
+ (OPA_VNIC_MAX_NUM_VPORT << 8)));
+
+ /*
+ * Since a get routine is always sent by the EM first we
+ * set the expected response time to
+ * 4.096 usec * 2^18 == 1.0737 sec here.
+ */
+ port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ memcpy(&port->class_port_info, recvd_mad->data,
+ sizeof(port->class_port_info));
+
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ */
+static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ memset(port_info, 0, sizeof(*port_info));
+ opa_vnic_get_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_get_per_veswport_info(adapter,
+ &port_info->vport);
+ } else {
+ vema_get_pod_values(port_info);
+ }
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_veswport_info *port_info;
+ struct opa_vnic_adapter *adapter;
+ u8 vport_num;
+
+ vport_num = vema_get_vport_num(recvd_mad);
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ adapter = vema_add_vport(port, vport_num);
+ if (IS_ERR(adapter)) {
+ c_err("failed to add vport %d: %ld\n",
+ vport_num, PTR_ERR(adapter));
+ goto err_exit;
+ }
+ }
+
+ port_info = (struct opa_veswport_info *)recvd_mad->data;
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ return;
+
+err_exit:
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
+ mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
+
+ if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+ mac_tbl_out->offset = mac_tbl_in->offset;
+ mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+ opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
+ if (vema_mac_tbl_req_ok(mac_tbl)) {
+ if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ }
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ vema_get_pod_values(port_info);
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ opa_vnic_release_mac_tbl(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ * @attr_id: Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad,
+ u16 attr_id)
+{
+ struct opa_veswport_iface_macs *macs_in, *macs_out;
+ int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
+ macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
+
+ macs_out->start_idx = macs_in->start_idx;
+ if (macs_in->num_macs_in_msg)
+ macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+ else
+ macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+ if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
+ opa_vnic_query_mcast_macs(adapter, macs_out);
+ else
+ opa_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_summary_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
+ opa_vnic_get_summary_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_error_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_error_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
+ opa_vnic_get_error_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_IFACE_UCAST_MACS:
+ /* fall through */
+ case OPA_EM_ATTR_IFACE_MCAST_MACS:
+ vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+ break;
+ case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+ vema_get_summary_counters(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+ vema_get_error_counters(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_set(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_set_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_set_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_set_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_DELETE_VESW:
+ vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_wc)
+{
+ rdma_destroy_ah(mad_wc->send_buf->ah);
+ ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct opa_vnic_vema_port *port;
+ struct ib_ah *ah;
+ struct ib_mad_send_buf *rsp;
+ struct opa_vnic_vema_mad *vema_mad;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad)
+ return;
+
+ port = mad_agent->context;
+ ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+ mad_wc->recv_buf.grh, mad_agent->port_num);
+ if (IS_ERR(ah))
+ goto free_recv_mad;
+
+ rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+ mad_wc->wc->pkey_index, 0,
+ IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(rsp))
+ goto err_rsp;
+
+ rsp->ah = ah;
+ vema_mad = rsp->mad;
+ memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+ vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ vema_mad->mad_hdr.status = 0;
+
+ /* Lock ensures network adapter is not removed */
+ mutex_lock(&port->lock);
+
+ switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ case IB_MGMT_METHOD_SET:
+ vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ default:
+ vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+ mutex_unlock(&port->lock);
+
+ if (!ib_post_send_mad(rsp, NULL)) {
+ /*
+ * with post send successful ah and send mad
+ * will be destroyed in send handler
+ */
+ goto free_recv_mad;
+ }
+
+ ib_free_send_mad(rsp);
+
+err_rsp:
+ rdma_destroy_ah(ah);
+free_recv_mad:
+ ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the opa_vnic_vema_port
+ * @cport: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the opa_vnic_vema port structure that is associated
+ * with the OPA port number
+ *
+ * Return: ptr to requested opa_vnic_vema_port strucure
+ * if success, NULL if not
+ */
+static struct opa_vnic_vema_port *
+vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
+{
+ struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
+
+ if (port_num > cport->num_ports)
+ return NULL;
+
+ return port + (port_num - 1);
+}
+
+/**
+ * opa_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cport: pointer to vnic control port
+ * @data: pointer to trap data filled by calling function
+ * @lid: issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid)
+{
+ struct opa_vnic_ctrl_port *cport = adapter->cport;
+ struct ib_mad_send_buf *send_buf;
+ struct opa_vnic_vema_port *port;
+ struct ib_device *ibp;
+ struct opa_vnic_vema_mad_trap *trap_mad;
+ struct opa_class_port_info *class;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
+ struct opa_veswport_trap *trap;
+ u32 trap_lid;
+ u16 pkey_idx;
+
+ if (!cport)
+ goto err_exit;
+ ibp = cport->ibdev;
+ port = vema_get_port(cport, data->opaportnum);
+ if (!port || !port->mad_agent)
+ goto err_exit;
+
+ if (time_before(jiffies, adapter->trap_timeout)) {
+ if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
+ v_warn("Trap rate exceeded\n");
+ goto err_exit;
+ } else {
+ adapter->trap_count++;
+ }
+ } else {
+ adapter->trap_count = 0;
+ }
+
+ class = &port->class_port_info;
+ /* Set up address handle */
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(ibp, port->port_num);
+ rdma_ah_set_sl(&ah_attr,
+ GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd));
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ trap_lid = be32_to_cpu(class->trap_lid);
+ /*
+ * check for trap lid validity, must not be zero
+ * The trap sink could change after we fashion the MAD but since traps
+ * are not guaranteed we won't use a lock as anyway the change will take
+ * place even with locking.
+ */
+ if (!trap_lid) {
+ c_err("%s: Invalid dlid\n", __func__);
+ goto err_exit;
+ }
+
+ rdma_ah_set_dlid(&ah_attr, trap_lid);
+ ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
+ c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
+ rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr),
+ rdma_ah_get_port_num(&ah_attr));
+ goto err_exit;
+ }
+
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
+ &pkey_idx) < 0) {
+ c_err("%s:full key not found, defaulting to partial\n",
+ __func__);
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
+ &pkey_idx) < 0)
+ pkey_idx = 1;
+ }
+
+ send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+ IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(send_buf)) {
+ c_err("%s:Couldn't allocate send buf\n", __func__);
+ goto err_sndbuf;
+ }
+
+ send_buf->ah = ah;
+
+ /* Set up common MAD hdr */
+ trap_mad = send_buf->mad;
+ trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
+ trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
+ trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+ port->tid++;
+ trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+ trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+ /* Set up vendor OUI */
+ trap_mad->oui[0] = INTEL_OUI_1;
+ trap_mad->oui[1] = INTEL_OUI_2;
+ trap_mad->oui[2] = INTEL_OUI_3;
+
+ /* Setup notice attribute portion */
+ trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+ trap_mad->notice.oui_1 = INTEL_OUI_1;
+ trap_mad->notice.oui_2 = INTEL_OUI_2;
+ trap_mad->notice.oui_3 = INTEL_OUI_3;
+ trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+ /* copy the actual trap data */
+ trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
+ trap->fabric_id = cpu_to_be16(data->fabric_id);
+ trap->veswid = cpu_to_be16(data->veswid);
+ trap->veswportnum = cpu_to_be32(data->veswportnum);
+ trap->opaportnum = cpu_to_be16(data->opaportnum);
+ trap->veswportindex = data->veswportindex;
+ trap->opcode = data->opcode;
+
+ /* If successful send set up rate limit timeout else bail */
+ if (ib_post_send_mad(send_buf, NULL)) {
+ ib_free_send_mad(send_buf);
+ } else {
+ if (adapter->trap_count)
+ return;
+ adapter->trap_timeout = jiffies +
+ usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
+ return;
+ }
+
+err_sndbuf:
+ rdma_destroy_ah(ah);
+err_exit:
+ v_err("Aborting trap\n");
+}
+
+static int vema_rem_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ opa_vnic_rem_netdev(adapter);
+ return 0;
+}
+
+static int vema_enable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_on(adapter->netdev);
+ return 0;
+}
+
+static int vema_disable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_off(adapter->netdev);
+ return 0;
+}
+
+static void opa_vnic_event(struct ib_event_handler *handler,
+ struct ib_event *record)
+{
+ struct opa_vnic_vema_port *port =
+ container_of(handler, struct opa_vnic_vema_port, event_handler);
+ struct opa_vnic_ctrl_port *cport = port->cport;
+
+ if (record->element.port_num != port->port_num)
+ return;
+
+ c_dbg("OPA_VNIC received event %d on device %s port %d\n",
+ record->event, record->device->name, record->element.port_num);
+
+ if (record->event == IB_EVENT_PORT_ERR)
+ idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+ if (record->event == IB_EVENT_PORT_ACTIVE)
+ idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cport: pointer to control port
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct opa_vnic_ctrl_port *cport)
+{
+ int i;
+
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+
+ if (!port->mad_agent)
+ continue;
+
+ /* Lock ensures no MAD is being processed */
+ mutex_lock(&port->lock);
+ idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
+ mutex_unlock(&port->lock);
+
+ ib_unregister_mad_agent(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ ib_unregister_event_handler(&port->event_handler);
+ }
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cport: pointer to control port
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct opa_vnic_ctrl_port *cport)
+{
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
+ .mgmt_class_version = OPA_MGMT_BASE_VERSION,
+ .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+ };
+ int i;
+
+ set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+ set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+ /* register ib event handler and mad agent for each port on dev */
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+ int ret;
+
+ port->cport = cport;
+ port->port_num = i;
+
+ INIT_IB_EVENT_HANDLER(&port->event_handler,
+ cport->ibdev, opa_vnic_event);
+ ret = ib_register_event_handler(&port->event_handler);
+ if (ret) {
+ c_err("port %d: event handler register failed\n", i);
+ vema_unregister(cport);
+ return ret;
+ }
+
+ idr_init(&port->vport_idr);
+ mutex_init(&port->lock);
+ port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
+ IB_QPT_GSI, &reg_req,
+ IB_MGMT_RMPP_VERSION,
+ vema_send, vema_recv,
+ port, 0);
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ vema_unregister(cport);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * opa_vnic_vema_add_one -- Handle new ib device
+ * @device: ib device pointer
+ *
+ * Allocate the vnic control port and initialize it.
+ */
+static void opa_vnic_vema_add_one(struct ib_device *device)
+{
+ struct opa_vnic_ctrl_port *cport;
+ int rc, size = sizeof(*cport);
+
+ if (!rdma_cap_opa_vnic(device))
+ return;
+
+ size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
+ cport = kzalloc(size, GFP_KERNEL);
+ if (!cport)
+ return;
+
+ cport->num_ports = device->phys_port_cnt;
+ cport->ibdev = device;
+
+ /* Initialize opa vnic management agent (vema) */
+ rc = vema_register(cport);
+ if (!rc)
+ c_info("VNIC client initialized\n");
+
+ ib_set_client_data(device, &opa_vnic_client, cport);
+}
+
+/**
+ * opa_vnic_vema_rem_one -- Handle ib device removal
+ * @device: ib device pointer
+ * @client_data: ib client data
+ *
+ * Uninitialize and free the vnic control port.
+ */
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data)
+{
+ struct opa_vnic_ctrl_port *cport = client_data;
+
+ if (!cport)
+ return;
+
+ c_info("removing VNIC client\n");
+ vema_unregister(cport);
+ kfree(cport);
+}
+
+static int __init opa_vnic_init(void)
+{
+ int rc;
+
+ pr_info("OPA Virtual Network Driver - v%s\n",
+ opa_vnic_driver_version);
+
+ rc = ib_register_client(&opa_vnic_client);
+ if (rc)
+ pr_err("VNIC driver register failed %d\n", rc);
+
+ return rc;
+}
+module_init(opa_vnic_init);
+
+static void opa_vnic_deinit(void)
+{
+ ib_unregister_client(&opa_vnic_client);
+}
+module_exit(opa_vnic_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
new file mode 100644
index 000000000000..c2733964379c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC EMA Interface functions.
+ */
+
+#include "opa_vnic_internal.h"
+
+/**
+ * opa_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct __opa_veswport_trap trap_data;
+
+ trap_data.fabric_id = info->vesw.fabric_id;
+ trap_data.veswid = info->vesw.vesw_id;
+ trap_data.veswportnum = info->vport.port_num;
+ trap_data.opaportnum = adapter->port_num;
+ trap_data.veswportindex = adapter->vport_num;
+ trap_data.opcode = event;
+
+ opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
+}
+
+/**
+ * opa_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+ __be64 *dst;
+ u64 *src;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets);
+ cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets);
+ cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes);
+ cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes);
+
+ /*
+ * This loop depends on layout of
+ * opa_veswport_summary_counters opa_vnic_stats structures.
+ */
+ for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast;
+ dst < &cntrs->reserved[0]; dst++, src++) {
+ *dst = cpu_to_be64(*src);
+ }
+}
+
+/**
+ * opa_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ spin_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ spin_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero);
+ cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state);
+ cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors +
+ vstats.netstats.tx_carrier_errors);
+
+ cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler);
+ cntrs->rx_runt = cpu_to_be64(vstats.rx_runt);
+ cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize);
+ cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state);
+ cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors);
+}
+
+/**
+ * opa_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *src = &adapter->info.vesw;
+ int i;
+
+ info->fabric_id = cpu_to_be16(src->fabric_id);
+ info->vesw_id = cpu_to_be16(src->vesw_id);
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+ info->def_port_mask = cpu_to_be16(src->def_port_mask);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+ info->pkey = cpu_to_be16(src->pkey);
+
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+ info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+ info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+ memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * opa_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *dst = &adapter->info.vesw;
+ int i;
+
+ dst->fabric_id = be16_to_cpu(info->fabric_id);
+ dst->vesw_id = be16_to_cpu(info->vesw_id);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+ dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+ dst->pkey = be16_to_cpu(info->pkey);
+
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+ dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+ memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * opa_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *src = &adapter->info.vport;
+
+ info->port_num = cpu_to_be32(src->port_num);
+ info->eth_link_status = src->eth_link_status;
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+ memcpy(info->base_mac_addr, src->base_mac_addr,
+ ARRAY_SIZE(info->base_mac_addr));
+ info->config_state = src->config_state;
+ info->oper_state = src->oper_state;
+ info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+ info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+ info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+ info->encap_slid = cpu_to_be32(src->encap_slid);
+ memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+ ARRAY_SIZE(info->pcp_to_sc_uc));
+ memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+ ARRAY_SIZE(info->pcp_to_vl_uc));
+ memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+ ARRAY_SIZE(info->pcp_to_sc_mc));
+ memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+ ARRAY_SIZE(info->pcp_to_vl_mc));
+ info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+ info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+ info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+ info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+ info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+ info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * opa_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *dst = &adapter->info.vport;
+
+ dst->port_num = be32_to_cpu(info->port_num);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+ memcpy(dst->base_mac_addr, info->base_mac_addr,
+ ARRAY_SIZE(dst->base_mac_addr));
+ dst->config_state = info->config_state;
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+ dst->encap_slid = be32_to_cpu(info->encap_slid);
+ memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+ ARRAY_SIZE(dst->pcp_to_sc_uc));
+ memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+ ARRAY_SIZE(dst->pcp_to_vl_uc));
+ memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+ ARRAY_SIZE(dst->pcp_to_sc_mc));
+ memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+ ARRAY_SIZE(dst->pcp_to_vl_mc));
+ dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+ dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+ dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+ dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * opa_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ netdev_for_each_mc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * opa_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ /* loop through dev_addrs list first */
+ for_each_dev_addr(adapter->netdev, ha) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ /* Do not include EM specified MAC address */
+ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+ ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+ continue;
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ /* loop through uc list */
+ netdev_for_each_uc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+ netdev_uc_count(adapter->netdev);
+ macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 79bf48477ddb..2354c742caa1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -40,6 +40,7 @@
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/jiffies.h>
+#include <linux/lockdep.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h>
@@ -311,6 +312,11 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
if (ch->cm_id)
ib_destroy_cm_id(ch->cm_id);
ch->cm_id = new_cm_id;
+ if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
+ target->srp_host->port))
+ ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ ch->path.rec_type = SA_PATH_REC_TYPE_IB;
ch->path.sgid = target->sgid;
ch->path.dgid = target->orig_dgid;
ch->path.pkey = target->pkey;
@@ -371,7 +377,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct srp_fr_desc *d;
struct ib_mr *mr;
int i, ret = -EINVAL;
- enum ib_mr_type mr_type;
if (pool_size <= 0)
goto err;
@@ -385,13 +390,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
- if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
- mr_type = IB_MR_TYPE_SG_GAPS;
- else
- mr_type = IB_MR_TYPE_MEM_REG;
-
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
+ mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
if (ret == -ENOMEM)
@@ -470,9 +471,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
* completion handler can access the queue pair while it is
* being destroyed.
*/
-static void srp_destroy_qp(struct ib_qp *qp)
+static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
{
- ib_drain_rq(qp);
+ spin_lock_irq(&ch->lock);
+ ib_process_cq_direct(ch->send_cq, -1);
+ spin_unlock_irq(&ch->lock);
+
+ ib_drain_qp(qp);
ib_destroy_qp(qp);
}
@@ -546,7 +551,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
}
if (ch->qp)
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
@@ -570,7 +575,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- srp_destroy_qp(qp);
+ ib_destroy_qp(qp);
err_send_cq:
ib_free_cq(send_cq);
@@ -613,7 +618,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
ib_destroy_fmr_pool(ch->fmr_pool);
}
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
@@ -643,7 +648,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
}
static void srp_path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *ch_ptr)
{
struct srp_rdma_ch *ch = ch_ptr;
@@ -1804,6 +1809,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
+ lockdep_assert_held(&ch->lock);
+
ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
@@ -1824,6 +1831,11 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
return iu;
}
+/*
+ * Note: if this function is called from inside ib_drain_sq() then it will
+ * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
+ * with status IB_WC_SUCCESS then that's a bug.
+ */
static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
@@ -1834,6 +1846,8 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ lockdep_assert_held(&ch->lock);
+
list_add(&iu->list, &ch->free_tx);
}
@@ -1889,17 +1903,24 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
spin_lock_irqsave(&ch->lock, flags);
ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ if (rsp->tag == ch->tsk_mgmt_tag) {
+ ch->tsk_mgmt_status = -1;
+ if (be32_to_cpu(rsp->resp_data_len) >= 4)
+ ch->tsk_mgmt_status = rsp->data[3];
+ complete(&ch->tsk_mgmt_done);
+ } else {
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Received tsk mgmt response too late for tag %#llx\n",
+ rsp->tag);
+ }
spin_unlock_irqrestore(&ch->lock, flags);
-
- ch->tsk_mgmt_status = -1;
- if (be32_to_cpu(rsp->resp_data_len) >= 4)
- ch->tsk_mgmt_status = rsp->data[3];
- complete(&ch->tsk_mgmt_done);
} else {
scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
- if (scmnd) {
+ if (scmnd && scmnd->host_scribble) {
req = (void *)scmnd->host_scribble;
scmnd = srp_claim_req(ch, req, NULL, scmnd);
+ } else {
+ scmnd = NULL;
}
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
@@ -2383,12 +2404,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
switch (event->param.rej_rcvd.reason) {
case IB_CM_REJ_PORT_CM_REDIRECT:
cpi = event->param.rej_rcvd.ari;
- ch->path.dlid = cpi->redirect_lid;
+ sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
ch->path.pkey = cpi->redirect_pkey;
cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
- ch->status = ch->path.dlid ?
+ ch->status = sa_path_get_dlid(&ch->path) ?
SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
break;
@@ -2531,19 +2552,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
}
static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
- u8 func)
+ u8 func, u8 *status)
{
struct srp_target_port *target = ch->target;
struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
+ int res;
if (!ch->connected || target->qp_in_error)
return -1;
- init_completion(&ch->tsk_mgmt_done);
-
/*
* Lock the rport mutex to avoid that srp_create_ch_ib() is
* invoked while a task management function is being sent.
@@ -2566,10 +2586,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
tsk_mgmt->opcode = SRP_TSK_MGMT;
int_to_scsilun(lun, &tsk_mgmt->lun);
- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
tsk_mgmt->tsk_mgmt_func = func;
tsk_mgmt->task_tag = req_tag;
+ spin_lock_irq(&ch->lock);
+ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->tag = ch->tsk_mgmt_tag;
+ spin_unlock_irq(&ch->lock);
+
+ init_completion(&ch->tsk_mgmt_done);
+
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
@@ -2578,13 +2604,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
return -1;
}
+ res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
+ if (res > 0 && status)
+ *status = ch->tsk_mgmt_status;
mutex_unlock(&rport->mutex);
- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
- return -1;
+ WARN_ON_ONCE(res < 0);
- return 0;
+ return res > 0 ? 0 : -1;
}
static int srp_abort(struct scsi_cmnd *scmnd)
@@ -2610,7 +2638,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host,
"Sending SRP abort for tag %#x\n", tag);
if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
- SRP_TSK_ABORT_TASK) == 0)
+ SRP_TSK_ABORT_TASK, NULL) == 0)
ret = SUCCESS;
else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
@@ -2628,14 +2656,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch;
int i;
+ u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
ch = &target->ch[0];
if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
- SRP_TSK_LUN_RESET))
+ SRP_TSK_LUN_RESET, &status))
return FAILED;
- if (ch->tsk_mgmt_status)
+ if (status)
return FAILED;
for (i = 0; i < target->ch_count; i++) {
@@ -2664,9 +2693,8 @@ static int srp_slave_alloc(struct scsi_device *sdev)
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = srp_dev->dev;
- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ if (true)
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
@@ -2869,6 +2897,7 @@ static struct scsi_host_template srp_template = {
.info = srp_target_info,
.queuecommand = srp_queuecommand,
.change_queue_depth = srp_change_queue_depth,
+ .eh_timed_out = srp_timed_out,
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
@@ -2909,7 +2938,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
sprintf(target->target_name, "SRP.T10:%016llX",
be64_to_cpu(target->id_ext));
- if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
+ if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
return -ENODEV;
memcpy(ids.port_id, &target->id_ext, 8);
@@ -3421,11 +3450,12 @@ static ssize_t srp_create_target(struct device *dev,
ret = srp_connect_ch(ch, multich);
if (ret) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection %d/%d failed\n",
+ PFX "Connection %d/%d to %pI6 failed\n",
ch_start + cpu_idx,
- target->ch_count);
+ target->ch_count,
+ ch->target->orig_dgid.raw);
if (node_idx == 0 && cpu_idx == 0) {
- goto err_disconnect;
+ goto free_ch;
} else {
srp_free_ch_ib(target, ch);
srp_free_req_data(target, ch);
@@ -3472,6 +3502,7 @@ put:
err_disconnect:
srp_disconnect_target(target);
+free_ch:
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
srp_free_ch_ib(target, ch);
@@ -3520,7 +3551,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
host->port = port;
host->dev.class = &srp_class;
- host->dev.parent = device->dev->dma_device;
+ host->dev.parent = device->dev->dev.parent;
dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
if (device_register(&host->dev))
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 21c69695f9d4..ab9077b81d5a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -152,7 +152,7 @@ struct srp_rdma_ch {
struct completion done;
int status;
- struct ib_sa_path_rec path;
+ struct sa_path_rec path;
struct ib_sa_query *path_query;
int path_query_id;
@@ -163,6 +163,7 @@ struct srp_rdma_ch {
int max_ti_iu_len;
int comp_vector;
+ u64 tsk_mgmt_tag;
struct completion tsk_mgmt_done;
u8 tsk_mgmt_status;
bool connected;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index d21ba9d857c3..402275be0931 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -417,7 +417,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- ib_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -481,7 +481,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err:
ib_free_recv_mad(mad_wc);
}
@@ -500,6 +500,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
struct ib_mad_reg_req reg_req;
struct ib_port_modify port_modify;
struct ib_port_attr port_attr;
+ __be16 *guid;
int ret;
memset(&port_modify, 0, sizeof(port_modify));
@@ -522,10 +523,17 @@ static int srpt_refresh_port(struct srpt_port *sport)
if (ret)
goto err_query_port;
+ sport->port_guid_wwn.priv = sport;
+ guid = (__be16 *)&sport->gid.global.interface_id;
snprintf(sport->port_guid, sizeof(sport->port_guid),
- "0x%016llx%016llx",
- be64_to_cpu(sport->gid.global.subnet_prefix),
- be64_to_cpu(sport->gid.global.interface_id));
+ "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
+ sport->port_gid_wwn.priv = sport;
+ snprintf(sport->port_gid, sizeof(sport->port_gid),
+ "0x%016llx%016llx",
+ be64_to_cpu(sport->gid.global.subnet_prefix),
+ be64_to_cpu(sport->gid.global.interface_id));
if (!sport->mad_agent) {
memset(&reg_req, 0, sizeof(reg_req));
@@ -1149,8 +1157,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
- pr_debug("Aborting cmd with state %d and tag %lld\n", state,
- ioctx->cmd.tag);
+ pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
+ ioctx->state, ioctx->cmd.tag);
switch (state) {
case SRPT_STATE_NEW:
@@ -1838,6 +1846,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
struct srp_login_rej *rej;
struct ib_cm_rep_param *rep_param;
struct srpt_rdma_ch *ch, *tmp_ch;
+ __be16 *guid;
u32 it_iu_len;
int i, ret = 0;
@@ -1983,26 +1992,30 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto destroy_ib;
}
- /*
- * Use the initator port identifier as the session name, when
- * checking against se_node_acl->initiatorname[] this can be
- * with or without preceeding '0x'.
- */
+ guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
+ snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
be64_to_cpu(*(__be64 *)ch->i_port_id),
be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_guid_tpg.se_tpg_wwn)
+ ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
+ TARGET_PROT_NORMAL,
+ ch->ini_guid, ch, NULL);
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL, ch->sess_name, ch,
NULL);
/* Retry without leading "0x" */
- if (IS_ERR(ch->sess))
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL,
ch->sess_name + 2, ch, NULL);
- if (IS_ERR(ch->sess)) {
+ if (IS_ERR_OR_NULL(ch->sess)) {
pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
ch->sess_name);
rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
@@ -2289,12 +2302,8 @@ static void srpt_queue_response(struct se_cmd *cmd)
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
- if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
- || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
- atomic_inc(&ch->req_lim_delta);
- srpt_abort_cmd(ioctx);
+ if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
return;
- }
/* For read commands, transfer the data to the initiator. */
if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
@@ -2420,7 +2429,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
return 0;
}
-static struct srpt_port *__srpt_lookup_port(const char *name)
+static struct se_wwn *__srpt_lookup_wwn(const char *name)
{
struct ib_device *dev;
struct srpt_device *sdev;
@@ -2435,23 +2444,25 @@ static struct srpt_port *__srpt_lookup_port(const char *name)
for (i = 0; i < dev->phys_port_cnt; i++) {
sport = &sdev->port[i];
- if (!strcmp(sport->port_guid, name))
- return sport;
+ if (strcmp(sport->port_guid, name) == 0)
+ return &sport->port_guid_wwn;
+ if (strcmp(sport->port_gid, name) == 0)
+ return &sport->port_gid_wwn;
}
}
return NULL;
}
-static struct srpt_port *srpt_lookup_port(const char *name)
+static struct se_wwn *srpt_lookup_wwn(const char *name)
{
- struct srpt_port *sport;
+ struct se_wwn *wwn;
spin_lock(&srpt_dev_lock);
- sport = __srpt_lookup_port(name);
+ wwn = __srpt_lookup_wwn(name);
spin_unlock(&srpt_dev_lock);
- return sport;
+ return wwn;
}
/**
@@ -2464,8 +2475,7 @@ static void srpt_add_one(struct ib_device *device)
struct ib_srq_init_attr srq_attr;
int i;
- pr_debug("device = %p, device->dma_ops = %p\n", device,
- device->dma_ops);
+ pr_debug("device = %p\n", device);
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev)
@@ -2643,11 +2653,19 @@ static char *srpt_get_fabric_name(void)
return "srpt";
}
+static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
+{
+ return tpg->se_tpg_wwn->priv;
+}
+
static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
- return sport->port_guid;
+ WARN_ON_ONCE(tpg != &sport->port_guid_tpg &&
+ tpg != &sport->port_gid_tpg);
+ return tpg == &sport->port_guid_tpg ? sport->port_guid :
+ sport->port_gid;
}
static u16 srpt_get_tag(struct se_portal_group *tpg)
@@ -2667,7 +2685,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
struct srpt_rdma_ch *ch = ioctx->ch;
unsigned long flags;
- WARN_ON(ioctx->state != SRPT_STATE_DONE);
+ WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
+ !(ioctx->cmd.transport_state & CMD_T_ABORTED));
if (ioctx->n_rw_ctx) {
srpt_free_rw_ctxs(ch, ioctx);
@@ -2737,6 +2756,19 @@ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
return srpt_get_cmd_state(ioctx);
}
+static int srpt_parse_guid(u64 *guid, const char *name)
+{
+ u16 w[4];
+ int ret = -EINVAL;
+
+ if (sscanf(name, "%hx:%hx:%hx:%hx", &w[0], &w[1], &w[2], &w[3]) != 4)
+ goto out;
+ *guid = get_unaligned_be64(w);
+ ret = 0;
+out:
+ return ret;
+}
+
/**
* srpt_parse_i_port_id() - Parse an initiator port ID.
* @name: ASCII representation of a 128-bit initiator port ID.
@@ -2772,20 +2804,23 @@ out:
*/
static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
{
+ u64 guid;
u8 i_port_id[16];
+ int ret;
- if (srpt_parse_i_port_id(i_port_id, name) < 0) {
+ ret = srpt_parse_guid(&guid, name);
+ if (ret < 0)
+ ret = srpt_parse_i_port_id(i_port_id, name);
+ if (ret < 0)
pr_err("invalid initiator port ID %s\n", name);
- return -EINVAL;
- }
- return 0;
+ return ret;
}
static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
}
@@ -2794,7 +2829,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2822,7 +2857,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
}
@@ -2831,7 +2866,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2859,7 +2894,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
}
@@ -2868,7 +2903,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2906,7 +2941,7 @@ static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
}
@@ -2915,7 +2950,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
struct srpt_device *sdev = sport->sdev;
struct srpt_rdma_ch *ch;
unsigned long tmp;
@@ -2967,15 +3002,19 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
struct config_group *group,
const char *name)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
+ struct srpt_port *sport = wwn->priv;
+ static struct se_portal_group *tpg;
int res;
- /* Initialize sport->port_wwn and sport->port_tpg_1 */
- res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP);
+ WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
+ wwn != &sport->port_gid_wwn);
+ tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg :
+ &sport->port_gid_tpg;
+ res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP);
if (res)
return ERR_PTR(res);
- return &sport->port_tpg_1;
+ return tpg;
}
/**
@@ -2984,11 +3023,10 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
*/
static void srpt_drop_tpg(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg,
- struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
sport->enabled = false;
- core_tpg_deregister(&sport->port_tpg_1);
+ core_tpg_deregister(tpg);
}
/**
@@ -2999,19 +3037,7 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
struct config_group *group,
const char *name)
{
- struct srpt_port *sport;
- int ret;
-
- sport = srpt_lookup_port(name);
- pr_debug("make_tport(%s)\n", name);
- ret = -EINVAL;
- if (!sport)
- goto err;
-
- return &sport->port_wwn;
-
-err:
- return ERR_PTR(ret);
+ return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
}
/**
@@ -3020,9 +3046,6 @@ err:
*/
static void srpt_drop_tport(struct se_wwn *wwn)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
-
- pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
}
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 581878782854..cc1183851af5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -258,6 +258,7 @@ enum rdma_ch_state {
* against concurrent modification by the cm_id spinlock.
* @sess: Session information associated with this SRP channel.
* @sess_name: Session name.
+ * @ini_guid: Initiator port GUID.
* @release_work: Allows scheduling of srpt_release_channel().
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
@@ -284,6 +285,7 @@ struct srpt_rdma_ch {
struct list_head cmd_wait_list;
struct se_session *sess;
u8 sess_name[36];
+ u8 ini_guid[24];
struct work_struct release_work;
struct completion *release_done;
};
@@ -306,28 +308,34 @@ struct srpt_port_attrib {
* @mad_agent: per-port management datagram processing information.
* @enabled: Whether or not this target port is enabled.
* @port_guid: ASCII representation of Port GUID
+ * @port_gid: ASCII representation of Port GID
* @port: one-based port number.
* @sm_lid: cached value of the port's sm_lid.
* @lid: cached value of the port's lid.
* @gid: cached value of the port's gid.
* @port_acl_lock spinlock for port_acl_list:
* @work: work structure for refreshing the aforementioned cached values.
- * @port_tpg_1 Target portal group = 1 data.
- * @port_wwn: Target core WWN data.
+ * @port_guid_tpg: TPG associated with target port GUID.
+ * @port_guid_wwn: WWN associated with target port GUID.
+ * @port_gid_tpg: TPG associated with target port GID.
+ * @port_gid_wwn: WWN associated with target port GID.
* @port_acl_list: Head of the list with all node ACLs for this port.
*/
struct srpt_port {
struct srpt_device *sdev;
struct ib_mad_agent *mad_agent;
bool enabled;
- u8 port_guid[64];
+ u8 port_guid[24];
+ u8 port_gid[64];
u8 port;
u16 sm_lid;
u16 lid;
union ib_gid gid;
struct work_struct work;
- struct se_portal_group port_tpg_1;
- struct se_wwn port_wwn;
+ struct se_portal_group port_guid_tpg;
+ struct se_wwn port_guid_wwn;
+ struct se_portal_group port_gid_tpg;
+ struct se_wwn port_gid_wwn;
struct srpt_port_attrib port_attrib;
};
OpenPOWER on IntegriCloud