From f3781d2e89f12dd5afa046dc56032af6e39bd116 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: RDMA: Remove subversion $Id tags They don't get updated by git and so they're worse than useless. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca126fc2b853..0dcbab3203c9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $ */ #ifndef _IPOIB_H -- cgit v1.2.1 From f89271da32bc1a636cf4eb078e615930886cd013 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:44 -0700 Subject: IPoIB: Copy small received SKBs in connected mode The connected mode implementation in the IPoIB driver has a large overhead in the way SKBs are handled in the receive flow. It usually allocates an SKB with as big as was used in the currently received SKB and moves unused fragments from the old SKB to the new one. This involves a loop on all the remaining fragments and incurs overhead on the CPU. This patch, for small SKBs, allocates an SKB just large enough to contain the received data and copies to it the data from the received SKB. The newly allocated SKB is passed to the stack and the old SKB is reposted. When running netperf, UDP small messages, without this pach I get: UDP UNIDIRECTIONAL SEND TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 14.4.3.178 (14.4.3.178) port 0 AF_INET Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 114688 128 10.00 5142034 0 526.31 114688 10.00 1130489 115.71 With this patch I get both send and receive at ~315 mbps. The reason that send performance actually slows down is as follows: When using this patch, the overhead of the CPU for handling RX packets is dramatically reduced. As a result, we do not experience RNR NAK messages from the receiver which cause the connection to be closed and reopened again; when the patch is not used, the receiver cannot handle the packets fast enough so there is less time to post new buffers and hence the mentioned RNR NACKs. So what happens is that the application *thinks* it posted a certain number of packets for transmission but these packets are flushed and do not really get transmitted. Since the connection gets opened and closed many times, each time netperf gets the CPU time that otherwise would have been given to IPoIB to actually transmit the packets. This can be verified when looking at the port counters -- the output of ifconfig and the oputput of netperf (this is for the case without the patch): tx packets ========== port counter: 1,543,996 ifconfig: 1,581,426 netperf: 5,142,034 rx packets ========== netperf 1,1304,089 Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0dcbab3203c9..8754b364f229 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -95,6 +95,7 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, MAX_SEND_CQE = 16, + IPOIB_CM_COPYBREAK = 256, }; #define IPOIB_OP_RECV (1ul << 31) -- cgit v1.2.1 From af40da894e96d5c826d38be3ea53ee00d9de0367 Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: IPoIB: add LRO support Add "ipoib_use_lro" module parameter to enable LRO and an "ipoib_lro_max_aggr" module parameter to set the max number of packets to be aggregated. Make LRO controllable and LRO statistics accessible through ethtool. Signed-off-by: Vladimir Sokolovsky Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 8754b364f229..2c522572e3c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -50,6 +50,7 @@ #include #include #include +#include /* constants */ @@ -94,6 +95,9 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + IPOIB_MAX_LRO_DESCRIPTORS = 8, + IPOIB_LRO_MAX_AGGR = 64, + MAX_SEND_CQE = 16, IPOIB_CM_COPYBREAK = 256, }; @@ -248,6 +252,11 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_lro { + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; +}; + /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -334,6 +343,8 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + + struct ipoib_lro lro; }; struct ipoib_ah { -- cgit v1.2.1 From ee1e2c82c245a5fb2864e9dbcdaab3390fde3fcc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 14 Jul 2008 23:48:49 -0700 Subject: IPoIB: Refresh paths instead of flushing them on SM change events The patch tries to solve the problem of device going down and paths being flushed on an SM change event. The method is to mark the paths as candidates for refresh (by setting the new valid flag to 0), and wait for an ARP probe a new path record query. The solution requires a different and less intrusive handling of SM change event. For that, the second argument of the flush function changes its meaning from a boolean flag to a level. In most cases, SM failover doesn't cause LID change so traffic won't stop. In the rare cases of LID change, the remote host (the one that hadn't changed its LID) will lose connectivity until paths are refreshed. This is no worse than the current state. In fact, preventing the device from going down saves packets that otherwise would be lost. Signed-off-by: Moni Levy Signed-off-by: Moni Shoua Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 2c522572e3c5..bb19587c5eaf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -54,6 +54,12 @@ /* constants */ +enum ipoib_flush_level { + IPOIB_FLUSH_LIGHT, + IPOIB_FLUSH_NORMAL, + IPOIB_FLUSH_HEAVY +}; + enum { IPOIB_ENCAP_LEN = 4, @@ -284,10 +290,11 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; - struct work_struct flush_task; + struct work_struct flush_light; + struct work_struct flush_normal; + struct work_struct flush_heavy; struct work_struct restart_task; struct delayed_work ah_reap_task; - struct work_struct pkey_event_task; struct ib_device *ca; u8 port; @@ -369,6 +376,7 @@ struct ipoib_path { struct rb_node rb_node; struct list_head list; + int valid; }; struct ipoib_neigh { @@ -433,11 +441,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_ib_dev_flush(struct work_struct *work); +void ipoib_ib_dev_flush_light(struct work_struct *work); +void ipoib_ib_dev_flush_normal(struct work_struct *work); +void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); -- cgit v1.2.1 From c03d4731b5b6de45b95a10bf1d510dde423d6757 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Remove unused IPOIB_MCAST_STARTED code The IPOIB_MCAST_STARTED flag is not used at all since commit b3e2749b ("IPoIB: Don't drop multicast sends when they can be queued"), so remove it. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bb19587c5eaf..66a897567ea9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -89,7 +89,6 @@ enum { IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, - IPOIB_MCAST_STARTED = 8, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_CSUM = 11, -- cgit v1.2.1 From 5892eff91ad60ba365ae7f75050ce464036c5396 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Remove priv->mcast_mutex No need for a mutex around calls to ib_attach_mcast/ib_detach_mcast since these operations are synchronized at the HW driver layer. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 66a897567ea9..b8753222c870 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -277,7 +277,6 @@ struct ipoib_dev_priv { unsigned long flags; - struct mutex mcast_mutex; struct mutex vlan_mutex; struct rb_root path_tree; -- cgit v1.2.1 From d0de13622d5ac658efe7c51521dbdbe0752aa3dd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Only set Q_Key once: after joining broadcast group The current code will set the Q_Key for any join of a non-sendonly multicast group. The operation involves a modify QP operation, which is fairly heavyweight, and is only really required after the join of the broadcast group. Fix this by adding a parameter to ipoib_mcast_attach() to control when the Q_Key is set. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b8753222c870..7b46e2d7b3c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -485,7 +485,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, #endif int ipoib_mcast_attach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); + union ib_gid *mgid, int set_qkey); int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -- cgit v1.2.1 From 9eae554c171e086c89ab83da2a2d3c8bf958fcb5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Jul 2008 23:48:50 -0700 Subject: IPoIB: Get rid of ipoib_mcast_detach() wrapper ipoib_mcast_detach() does nothing except call ib_detach_mcast(), so just use the core API in the one place that does a multicast group detach. add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-105 (-105) function old new delta ipoib_mcast_leave 357 319 -38 ipoib_mcast_detach 67 - -67 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7b46e2d7b3c2..a89b9fbe1ef4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -486,8 +486,6 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey); -int ipoib_mcast_detach(struct net_device *dev, u16 mlid, - union ib_gid *mgid); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); -- cgit v1.2.1 From e112373fd6aa280bd2cbc0d5cc3809115325a1be Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: IPoIB/cm: Reduce connected mode TX object size Since IPoIB connected mode does not NETIF_F_SG, we only have one DMA mapping per send, so we don't need a mapping[] array. Define a new struct with a single u64 mapping member and use it for the CM tx_ring. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a89b9fbe1ef4..0281c8fecc90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -157,6 +157,11 @@ struct ipoib_tx_buf { u64 mapping[MAX_SKB_FRAGS + 1]; }; +struct ipoib_cm_tx_buf { + struct sk_buff *skb; + u64 mapping; +}; + struct ib_cm_id; struct ipoib_cm_data { @@ -215,7 +220,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_tx_buf *tx_ring; + struct ipoib_cm_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; -- cgit v1.2.1 From bc3a290b51aaefc6a6af2d6e6d52ed32387c416c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Jul 2008 23:48:52 -0700 Subject: IPoIB: Double default RX/TX ring sizes Increase IPoIB ring sizes to twice their original sizes (RX: 128->256, TX: 64->128) to act as a shock absorber for high traffic peaks. With the current settings, we have seen cases that there are many calls to netif_stop_queue(), which causes degradation in throughput. Also, larger receive buffer sizes help IPoIB in CM mode to avoid experiencing RNR NAK conditions due to insufficient receive buffers at the SRQ. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib.h') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 0281c8fecc90..b0ffc9abe8c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -70,8 +70,8 @@ enum { IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, - IPOIB_RX_RING_SIZE = 128, - IPOIB_TX_RING_SIZE = 64, + IPOIB_RX_RING_SIZE = 256, + IPOIB_TX_RING_SIZE = 128, IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_CM_MAX_CONN_QP = 4096, -- cgit v1.2.1