summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig21
-rw-r--r--drivers/infiniband/hw/ipath/Makefile31
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h219
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c213
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c253
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c1086
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c45
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c1215
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c28
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c (renamed from drivers/infiniband/hw/ipath/ipath_ht400.c)192
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c (renamed from drivers/infiniband/hw/ipath/ipath_pe800.c)344
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c143
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c536
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h229
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c81
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c1175
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.h112
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c472
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c122
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c21
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c346
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c959
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h48
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c417
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c261
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c29
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c177
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c172
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c219
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c68
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c860
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h350
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c46
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c62
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c14
-rw-r--r--drivers/infiniband/hw/ipath/ips_common.h263
-rw-r--r--drivers/infiniband/hw/ipath/verbs_debug.h107
38 files changed, 6046 insertions, 4893 deletions
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 9ea67c409b6d..574a678e7fdd 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,16 +1,9 @@
-config IPATH_CORE
- tristate "PathScale InfiniPath Driver"
- depends on 64BIT && PCI_MSI && NET
- ---help---
- This is a low-level driver for PathScale InfiniPath host channel
- adapters (HCAs) based on the HT-400 and PE-800 chips.
-
config INFINIBAND_IPATH
- tristate "PathScale InfiniPath Verbs Driver"
- depends on IPATH_CORE && INFINIBAND
+ tristate "QLogic InfiniPath Driver"
+ depends on PCI_MSI && 64BIT && INFINIBAND
---help---
- This is a driver that provides InfiniBand verbs support for
- PathScale InfiniPath host channel adapters (HCAs). This
- allows these devices to be used with both kernel upper level
- protocols such as IP-over-InfiniBand as well as with userspace
- applications (in conjunction with InfiniBand userspace access).
+ This is a driver for QLogic InfiniPath host channel adapters,
+ including InfiniBand verbs support. This driver allows these
+ devices to be used with both kernel upper level protocols such
+ as IP-over-InfiniBand as well as with userspace applications
+ (in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index b4d084abfd22..5e29cb0095e5 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,36 +1,35 @@
-EXTRA_CFLAGS += -DIPATH_IDSTR='"PathScale kernel.org driver"' \
+EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-DIPATH_KERN_TYPE=0
-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-ipath_core-y := \
+ib_ipath-y := \
+ ipath_cq.o \
ipath_diag.o \
ipath_driver.o \
ipath_eeprom.o \
ipath_file_ops.o \
ipath_fs.o \
- ipath_ht400.o \
+ ipath_iba6110.o \
+ ipath_iba6120.o \
ipath_init_chip.o \
ipath_intr.o \
- ipath_layer.o \
- ipath_pe800.o \
- ipath_stats.o \
- ipath_sysfs.o \
- ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-
-ib_ipath-y := \
- ipath_cq.o \
ipath_keys.o \
+ ipath_layer.o \
ipath_mad.o \
+ ipath_mmap.o \
ipath_mr.o \
ipath_qp.o \
ipath_rc.o \
ipath_ruc.o \
ipath_srq.o \
+ ipath_stats.o \
+ ipath_sysfs.o \
ipath_uc.o \
ipath_ud.o \
- ipath_verbs.o \
- ipath_verbs_mcast.o
+ ipath_user_pages.o \
+ ipath_verbs_mcast.o \
+ ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 48a55247b832..54139d398181 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -38,7 +39,8 @@
* to communicate between kernel and user code.
*/
-/* This is the IEEE-assigned OUI for PathScale, Inc. */
+
+/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
#define IPATH_SRC_OUI_1 0x00
#define IPATH_SRC_OUI_2 0x11
#define IPATH_SRC_OUI_3 0x75
@@ -96,17 +98,17 @@ struct infinipath_stats {
__u64 sps_hwerrs;
/* number of times IB link changed state unexpectedly */
__u64 sps_iblink;
- /* no longer used; left for compatibility */
- __u64 sps_unused3;
+ /* kernel receive interrupts that didn't read intstat */
+ __u64 sps_fastrcvint;
/* number of kernel (port0) packets received */
__u64 sps_port0pkts;
/* number of "ethernet" packets sent by driver */
__u64 sps_ether_spkts;
/* number of "ethernet" packets received by driver */
__u64 sps_ether_rpkts;
- /* number of SMA packets sent by driver */
+ /* number of SMA packets sent by driver. Obsolete. */
__u64 sps_sma_spkts;
- /* number of SMA packets received by driver */
+ /* number of SMA packets received by driver. Obsolete. */
__u64 sps_sma_rpkts;
/* number of times all ports rcvhdrq was full and packet dropped */
__u64 sps_hdrqfull;
@@ -121,8 +123,7 @@ struct infinipath_stats {
__u64 sps_ports;
/* list of pkeys (other than default) accepted (0 means not set) */
__u16 sps_pkeys[4];
- /* lids for up to 4 infinipaths, indexed by infinipath # */
- __u16 sps_lid[4];
+ __u16 sps_unused16[4]; /* available; maintaining compatible layout */
/* number of user ports per chip (not IB ports) */
__u32 sps_nports;
/* not our interrupt, or already handled */
@@ -137,11 +138,10 @@ struct infinipath_stats {
__u64 sps_pageunlocks;
/*
* Number of packets dropped in kernel other than errors (ether
- * packets if ipath not configured, sma/mad, etc.)
+ * packets if ipath not configured, etc.)
*/
__u64 sps_krdrops;
- /* mlids for up to 4 infinipaths, indexed by infinipath # */
- __u16 sps_mlid[4];
+ __u64 sps_txeparity; /* PIO buffer parity error, recovered */
/* pad for future growth */
__u64 __sps_pad[45];
};
@@ -154,8 +154,6 @@ struct infinipath_stats {
#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
/* Device has been disabled via admin request */
#define IPATH_STATUS_ADMIN_DISABLED 0x4
-#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA 0x10 /* user SMA running */
/* Chip has been found and initted */
#define IPATH_STATUS_CHIP_PRESENT 0x20
/* IB link is at ACTIVE, usable for data traffic */
@@ -188,6 +186,9 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
+#define IPATH_RUNTIME_MASTER 0x10
+#define IPATH_RUNTIME_PBC_REWRITE 0x20
+#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
/*
* This structure is returned by ipath_userinit() immediately after
@@ -205,7 +206,8 @@ struct ipath_base_info {
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
- __u32 spi_port;
+ __u16 spi_port;
+ __u16 spi_subport;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
@@ -221,7 +223,7 @@ struct ipath_base_info {
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
- /* size of a single receive header queue entry. */
+ /* size of a single receive header queue entry in words. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
@@ -310,6 +312,15 @@ struct ipath_base_info {
__u32 spi_rcv_egrchunksize;
/* total size of mmap to cover full rcvegrbuffers */
__u32 spi_rcv_egrbuftotlen;
+ __u32 spi_filler_for_align;
+ /* address of readonly memory copy of the rcvhdrq tail register. */
+ __u64 spi_rcvhdr_tailaddr;
+
+ /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+ __u64 spi_subport_uregbase;
+ __u64 spi_subport_rcvegrbuf;
+ __u64 spi_subport_rcvhdr_base;
+
} __attribute__ ((aligned(8)));
@@ -328,12 +339,12 @@ struct ipath_base_info {
/*
* Minor version differences are always compatible
- * a within a major version, however if if user software is larger
+ * a within a major version, however if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference
+ * cares, or it must abort after initialization reports the difference.
*/
-#define IPATH_USER_SWMINOR 2
+#define IPATH_USER_SWMINOR 3
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
@@ -342,9 +353,9 @@ struct ipath_base_info {
/*
* Similarly, this is the kernel version going back to the user. It's
* slightly different, in that we want to tell if the driver was built as
- * part of a PathScale release, or from the driver from OpenIB, kernel.org,
- * or a standard distribution, for support reasons. The high bit is 0 for
- * non-PathScale, and 1 for PathScale-built/supplied.
+ * part of a QLogic release, or from the driver from openfabrics.org,
+ * kernel.org, or a standard distribution, for support reasons.
+ * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
*
* It's returned by the driver to the user code during initialization in the
* spi_sw_version field of ipath_base_info, so the user code can in turn
@@ -380,12 +391,15 @@ struct ipath_user_info {
__u32 spu_rcvhdrsize;
/*
- * cache line aligned (64 byte) user address to
- * which the rcvhdrtail register will be written by infinipath
- * whenever it changes, so that no chip registers are read in
- * the performance path.
+ * If two or more processes wish to share a port, each process
+ * must set the spu_subport_cnt and spu_subport_id to the same
+ * values. The only restriction on the spu_subport_id is that
+ * it be unique for a given node.
*/
- __u64 spu_rcvhdraddr;
+ __u16 spu_subport_cnt;
+ __u16 spu_subport_id;
+
+ __u32 spu_unused; /* kept for compatible layout */
/*
* address of struct base_info to write to
@@ -398,19 +412,25 @@ struct ipath_user_info {
#define IPATH_CMD_MIN 16
-#define IPATH_CMD_USER_INIT 16 /* set up userspace */
+#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
+#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */
+#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 24 /* set up userspace */
-#define IPATH_CMD_MAX 21
+#define IPATH_CMD_MAX 24
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
- __u32 port; /* port on unit assigned to caller */
+ __u16 port; /* port on unit assigned to caller */
+ __u16 subport; /* subport on unit assigned to caller */
+ __u16 num_ports; /* number of ports available on unit */
+ __u16 num_subports; /* number of subport slaves opened on port */
};
struct ipath_tid_info {
@@ -441,6 +461,8 @@ struct ipath_cmd {
__u32 recv_ctrl;
/* partition key to set */
__u16 part_key;
+ /* user address of __u32 bitmask of active slaves */
+ __u64 slave_mask_addr;
} cmd;
};
@@ -469,19 +491,18 @@ struct __ipath_sendpkt {
struct ipath_iovec sps_iov[4];
};
-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
- __u32 unit; /* unit on which to send packet */
- __u64 data; /* address of payload in userspace */
- __u32 len; /* length of payload */
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+ __u32 unit;
+ __u64 data;
+ __u32 len;
};
/*
* Data layout in I2C flash (for GUID, etc.)
* All fields are little-endian binary unless otherwise stated
*/
-#define IPATH_FLASH_VERSION 1
+#define IPATH_FLASH_VERSION 2
struct ipath_flash {
/* flash layout version (IPATH_FLASH_VERSION) */
__u8 if_fversion;
@@ -489,14 +510,14 @@ struct ipath_flash {
__u8 if_csum;
/*
* valid length (in use, protected by if_csum), including
- * if_fversion and if_sum themselves)
+ * if_fversion and if_csum themselves)
*/
__u8 if_length;
/* the GUID, in network order */
__u8 if_guid[8];
/* number of GUIDs to use, starting from if_guid */
__u8 if_numguid;
- /* the board serial number, in ASCII */
+ /* the (last 10 characters of) board serial number, in ASCII */
char if_serial[12];
/* board mfg date (YYYYMMDD ASCII) */
char if_mfgdate[8];
@@ -508,8 +529,10 @@ struct ipath_flash {
__u8 if_powerhour[2];
/* ASCII free-form comment field */
char if_comment[32];
- /* 78 bytes used, min flash size is 128 bytes */
- __u8 if_future[50];
+ /* Backwards compatible prefix for longer QLogic Serial Numbers */
+ char if_sprefix[4];
+ /* 82 bytes used, min flash size is 128 bytes */
+ __u8 if_future[46];
};
/*
@@ -601,16 +624,124 @@ struct infinipath_counters {
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT 4
/* SendPIO per-buffer control */
-#define INFINIPATH_SP_LENGTHP1_MASK 0x3FF
-#define INFINIPATH_SP_LENGTHP1_SHIFT 0
-#define INFINIPATH_SP_INTR 0x80000000
-#define INFINIPATH_SP_TEST 0x40000000
-#define INFINIPATH_SP_TESTEBP 0x20000000
+#define INFINIPATH_SP_TEST 0x40
+#define INFINIPATH_SP_TESTEBP 0x20
/* SendPIOAvail bits */
#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
+/* infinipath header format */
+struct ipath_header {
+ /*
+ * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
+ * 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
+ * Port 4, TID 11, offset 13.
+ */
+ __le32 ver_port_tid_offset;
+ __le16 chksum;
+ __le16 pkt_flags;
+};
+
+/* infinipath user message header format.
+ * This structure contains the first 4 fields common to all protocols
+ * that employ infinipath.
+ */
+struct ipath_message_header {
+ __be16 lrh[4];
+ __be32 bth[3];
+ /* fields below this point are in host byte order */
+ struct ipath_header iph;
+ __u8 sub_opcode;
+};
+
+/* infinipath ethernet header format */
+struct ether_header {
+ __be16 lrh[4];
+ __be32 bth[3];
+ struct ipath_header iph;
+ __u8 sub_opcode;
+ __u8 cmd;
+ __be16 lid;
+ __u16 mac[3];
+ __u8 frag_num;
+ __u8 seq_num;
+ __le32 len;
+ /* MUST be of word size due to PIO write requirements */
+ __le32 csum;
+ __le16 csum_offset;
+ __le16 flags;
+ __u16 first_2_bytes;
+ __u8 unused[2]; /* currently unused */
+};
+
+
+/* IB - LRH header consts */
+#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
+#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
+
+/* misc. */
+#define SIZE_OF_CRC 1
+
+#define IPATH_DEFAULT_P_KEY 0xFFFF
+#define IPATH_PERMISSIVE_LID 0xFFFF
+#define IPATH_AETH_CREDIT_SHIFT 24
+#define IPATH_AETH_CREDIT_MASK 0x1F
+#define IPATH_AETH_CREDIT_INVAL 0x1F
+#define IPATH_PSN_MASK 0xFFFFFF
+#define IPATH_MSN_MASK 0xFFFFFF
+#define IPATH_QPN_MASK 0xFFFFFF
+#define IPATH_MULTICAST_LID_BASE 0xC000
+#define IPATH_MULTICAST_QPN 0xFFFFFF
+
+/* Receive Header Queue: receive type (from infinipath) */
+#define RCVHQ_RCV_TYPE_EXPECTED 0
+#define RCVHQ_RCV_TYPE_EAGER 1
+#define RCVHQ_RCV_TYPE_NON_KD 2
+#define RCVHQ_RCV_TYPE_ERROR 3
+
+
+/* sub OpCodes - ith4x */
+#define IPATH_ITH4X_OPCODE_ENCAP 0x81
+#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
+
+#define IPATH_HEADER_QUEUE_WORDS 9
+
+/* functions for extracting fields from rcvhdrq entries for the driver.
+ */
+static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
+{
+ return __le32_to_cpu(rbuf[1]);
+}
+
+static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
+{
+ return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
+ & INFINIPATH_RHF_RCVTYPE_MASK;
+}
+
+static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
+{
+ return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
+ & INFINIPATH_RHF_LENGTH_MASK) << 2;
+}
+
+static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
+{
+ return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
+ & INFINIPATH_RHF_EGRINDEX_MASK;
+}
+
+static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
+{
+ return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
+ & INFINIPATH_I_VERS_MASK;
+}
+
#endif /* _IPATH_COMMON_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 7ece1135ddfe..87462e0cb4d2 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -41,20 +42,29 @@
* @entry: work completion entry to add
* @sig: true if @entry is a solicitated entry
*
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
+ struct ipath_cq_wc *wc;
unsigned long flags;
+ u32 head;
u32 next;
spin_lock_irqsave(&cq->lock, flags);
- if (cq->head == cq->ibcq.cqe)
+ /*
+ * Note that the head pointer might be writable by user processes.
+ * Take care to verify it is a sane value.
+ */
+ wc = cq->queue;
+ head = wc->head;
+ if (head >= (unsigned) cq->ibcq.cqe) {
+ head = cq->ibcq.cqe;
next = 0;
- else
- next = cq->head + 1;
- if (unlikely(next == cq->tail)) {
+ } else
+ next = head + 1;
+ if (unlikely(next == wc->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
@@ -66,8 +76,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
- cq->queue[cq->head] = *entry;
- cq->head = next;
+ wc->queue[head] = *entry;
+ wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -100,20 +110,27 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
+ struct ipath_cq_wc *wc;
unsigned long flags;
int npolled;
+ u32 tail;
spin_lock_irqsave(&cq->lock, flags);
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
- if (cq->tail == cq->head)
+ if (tail == wc->head)
break;
- *entry = cq->queue[cq->tail];
- if (cq->tail == cq->ibcq.cqe)
- cq->tail = 0;
+ *entry = wc->queue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
else
- cq->tail++;
+ tail++;
}
+ wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
@@ -157,29 +174,81 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
struct ib_ucontext *context,
struct ib_udata *udata)
{
+ struct ipath_ibdev *dev = to_idev(ibdev);
struct ipath_cq *cq;
- struct ib_wc *wc;
+ struct ipath_cq_wc *wc;
struct ib_cq *ret;
- /*
- * Need to use vmalloc() if we want to support large #s of
- * entries.
- */
+ if (entries < 1 || entries > ib_ipath_max_cqes) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ /* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto done;
}
/*
- * Need to use vmalloc() if we want to support large #s of entries.
+ * Allocate the completion queue entries and head/tail pointers.
+ * This is allocated separately so that it can be resized and
+ * also mapped into user space.
+ * We need to use vmalloc() in order to support mmap and large
+ * numbers of entries.
*/
- wc = vmalloc(sizeof(*wc) * (entries + 1));
+ wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
if (!wc) {
- kfree(cq);
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_cq;
}
+
+ /*
+ * Return the address of the WC as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ struct ipath_mmap_info *ip;
+ __u64 offset = (__u64) wc;
+ int err;
+
+ err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_wc;
+ }
+
+ /* Allocate info for ipath_mmap(). */
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
+ cq->ip = ip;
+ ip->context = context;
+ ip->obj = wc;
+ kref_init(&ip->ref);
+ ip->mmap_cnt = 0;
+ ip->size = PAGE_ALIGN(sizeof(*wc) +
+ sizeof(struct ib_wc) * entries);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ } else
+ cq->ip = NULL;
+
+ spin_lock(&dev->n_cqs_lock);
+ if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+ spin_unlock(&dev->n_cqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
+
+ dev->n_cqs_allocated++;
+ spin_unlock(&dev->n_cqs_lock);
+
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
@@ -190,13 +259,21 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
cq->triggered = 0;
spin_lock_init(&cq->lock);
tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
- cq->head = 0;
- cq->tail = 0;
+ wc->head = 0;
+ wc->tail = 0;
cq->queue = wc;
ret = &cq->ibcq;
-bail:
+ goto done;
+
+bail_wc:
+ vfree(wc);
+
+bail_cq:
+ kfree(cq);
+
+done:
return ret;
}
@@ -210,10 +287,17 @@ bail:
*/
int ipath_destroy_cq(struct ib_cq *ibcq)
{
+ struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
- vfree(cq->queue);
+ spin_lock(&dev->n_cqs_lock);
+ dev->n_cqs_allocated--;
+ spin_unlock(&dev->n_cqs_lock);
+ if (cq->ip)
+ kref_put(&cq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(cq->queue);
kfree(cq);
return 0;
@@ -237,7 +321,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
spin_lock_irqsave(&cq->lock, flags);
/*
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
- * any other transitions.
+ * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify;
@@ -245,49 +329,96 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
return 0;
}
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
- struct ib_wc *wc, *old_wc;
- u32 n;
+ struct ipath_cq_wc *old_wc;
+ struct ipath_cq_wc *wc;
+ u32 head, tail, n;
int ret;
+ if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
- wc = vmalloc(sizeof(*wc) * (cqe + 1));
+ wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
if (!wc) {
ret = -ENOMEM;
goto bail;
}
+ /*
+ * Return the address of the WC as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ __u64 offset = (__u64) wc;
+
+ ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (ret)
+ goto bail;
+ }
+
spin_lock_irq(&cq->lock);
- if (cq->head < cq->tail)
- n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+ /*
+ * Make sure head and tail are sane since they
+ * might be user writable.
+ */
+ old_wc = cq->queue;
+ head = old_wc->head;
+ if (head > (u32) cq->ibcq.cqe)
+ head = (u32) cq->ibcq.cqe;
+ tail = old_wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
+ if (head < tail)
+ n = cq->ibcq.cqe + 1 + head - tail;
else
- n = cq->head - cq->tail;
+ n = head - tail;
if (unlikely((u32)cqe < n)) {
spin_unlock_irq(&cq->lock);
vfree(wc);
ret = -EOVERFLOW;
goto bail;
}
- for (n = 0; cq->tail != cq->head; n++) {
- wc[n] = cq->queue[cq->tail];
- if (cq->tail == cq->ibcq.cqe)
- cq->tail = 0;
+ for (n = 0; tail != head; n++) {
+ wc->queue[n] = old_wc->queue[tail];
+ if (tail == (u32) cq->ibcq.cqe)
+ tail = 0;
else
- cq->tail++;
+ tail++;
}
cq->ibcq.cqe = cqe;
- cq->head = n;
- cq->tail = 0;
- old_wc = cq->queue;
+ wc->head = n;
+ wc->tail = 0;
cq->queue = wc;
spin_unlock_irq(&cq->lock);
vfree(old_wc);
+ if (cq->ip) {
+ struct ipath_ibdev *dev = to_idev(ibcq->device);
+ struct ipath_mmap_info *ip = cq->ip;
+
+ ip->obj = wc;
+ ip->size = PAGE_ALIGN(sizeof(*wc) +
+ sizeof(struct ib_wc) * cqe);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
ret = 0;
bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index 46762387f5f8..df69f0d80b8b 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -59,7 +60,6 @@
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
-#define __IPATH_SMADBG 0x8000 /* sma packet debug */
#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */
#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
@@ -83,7 +83,6 @@
/* print mmap/nopage stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x0
#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
-#define __IPATH_SMADBG 0x0 /* process startup (init)/exit messages */
#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 28ddceb260e8..29958b6e0214 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -40,13 +41,13 @@
* through the /sys/bus/pci resource mmap interface.
*/
+#include <linux/io.h>
#include <linux/pci.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
-#include "ipath_common.h"
#include "ipath_kernel.h"
-#include "ips_common.h"
-#include "ipath_layer.h"
+#include "ipath_common.h"
int ipath_diag_inuse;
static int diag_set_link;
@@ -66,18 +67,20 @@ static struct file_operations diag_file_ops = {
.release = ipath_diag_release
};
-static struct cdev *diag_cdev;
-static struct class_device *diag_class_dev;
-
-int ipath_diag_init(void)
+int ipath_diag_add(struct ipath_devdata *dd)
{
- return ipath_cdev_init(IPATH_DIAG_MINOR, "ipath_diag",
- &diag_file_ops, &diag_cdev, &diag_class_dev);
+ char name[16];
+
+ snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
+
+ return ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
+ &diag_file_ops, &dd->diag_cdev,
+ &dd->diag_class_dev);
}
-void ipath_diag_cleanup(void)
+void ipath_diag_remove(struct ipath_devdata *dd)
{
- ipath_cdev_cleanup(&diag_cdev, &diag_class_dev);
+ ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_class_dev);
}
/**
@@ -101,8 +104,7 @@ static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
int ret;
/* not very efficient, but it works for now */
- if (reg_addr < dd->ipath_kregbase ||
- reg_end > dd->ipath_kregend) {
+ if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
@@ -113,7 +115,7 @@ static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
goto bail;
}
reg_addr++;
- uaddr++;
+ uaddr += sizeof(u64);
}
ret = 0;
bail:
@@ -139,8 +141,7 @@ static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
int ret;
/* not very efficient, but it works for now */
- if (reg_addr < dd->ipath_kregbase ||
- reg_end > dd->ipath_kregend) {
+ if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
@@ -153,7 +154,7 @@ static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
writeq(data, reg_addr);
reg_addr++;
- uaddr++;
+ uaddr += sizeof(u64);
}
ret = 0;
bail:
@@ -191,7 +192,8 @@ static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
}
reg_addr++;
- uaddr++;
+ uaddr += sizeof(u32);
+
}
ret = 0;
bail:
@@ -230,7 +232,7 @@ static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
writel(data, reg_addr);
reg_addr++;
- uaddr++;
+ uaddr += sizeof(u32);
}
ret = 0;
bail:
@@ -239,59 +241,197 @@ bail:
static int ipath_diag_open(struct inode *in, struct file *fp)
{
+ int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
struct ipath_devdata *dd;
- int unit = 0; /* XXX this is bogus */
- unsigned long flags;
int ret;
- dd = ipath_lookup(unit);
-
mutex_lock(&ipath_mutex);
- spin_lock_irqsave(&ipath_devs_lock, flags);
if (ipath_diag_inuse) {
ret = -EBUSY;
goto bail;
}
- list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
- /*
- * we need at least one infinipath device to be present
- * (don't use INITTED, because we want to be able to open
- * even if device is in freeze mode, which cleared INITTED).
- * There is a small amount of risk to this, which is why we
- * also verify kregbase is set.
- */
-
- if (!(dd->ipath_flags & IPATH_PRESENT) ||
- !dd->ipath_kregbase)
- continue;
-
- ipath_diag_inuse = 1;
- diag_set_link = 0;
- ret = 0;
+ dd = ipath_lookup(unit);
+
+ if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
+ !dd->ipath_kregbase) {
+ ret = -ENODEV;
goto bail;
}
- ret = -ENODEV;
-
-bail:
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
+ fp->private_data = dd;
+ ipath_diag_inuse = 1;
+ diag_set_link = 0;
+ ret = 0;
/* Only expose a way to reset the device if we
make it into diag mode. */
- if (ret == 0)
- ipath_expose_reset(&dd->pcidev->dev);
+ ipath_expose_reset(&dd->pcidev->dev);
+bail:
mutex_unlock(&ipath_mutex);
return ret;
}
-static int ipath_diag_release(struct inode *i, struct file *f)
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+ .owner = THIS_MODULE,
+ .write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+ return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+ "ipath_diagpkt", &diagpkt_file_ops,
+ &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+ ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off)
+{
+ u32 __iomem *piobuf;
+ u32 plen, clen, pbufn;
+ struct ipath_diag_pkt dp;
+ u32 *tmpbuf = NULL;
+ struct ipath_devdata *dd;
+ ssize_t ret = 0;
+ u64 val;
+
+ if (count < sizeof(dp)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (copy_from_user(&dp, data, sizeof(dp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ /* send count must be an exact number of dwords */
+ if (dp.len & 3) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ clen = dp.len >> 2;
+
+ dd = ipath_lookup(dp.unit);
+ if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+ !dd->ipath_kregbase) {
+ ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+ dp.unit);
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ if (ipath_diag_inuse && !diag_set_link &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ diag_set_link = 1;
+ ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+ "diag pkt\n");
+ ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+ ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+ }
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+ ret = -ENODEV;
+ goto bail;
+ }
+ val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+ if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+ val != IPATH_IBSTATE_ACTIVE) {
+ ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+ dd->ipath_unit, (unsigned long long) val);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /* need total length before first word written */
+ /* +1 word is for the qword padding */
+ plen = sizeof(u32) + dp.len;
+
+ if ((plen + 4) > dd->ipath_ibmaxlen) {
+ ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+ plen - 4, dd->ipath_ibmaxlen);
+ ret = -EINVAL;
+ goto bail; /* before writing pbc */
+ }
+ tmpbuf = vmalloc(plen);
+ if (!tmpbuf) {
+ dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+ "failing\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (copy_from_user(tmpbuf,
+ (const void __user *) (unsigned long) dp.data,
+ dp.len)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ piobuf = ipath_getpiobuf(dd, &pbufn);
+ if (!piobuf) {
+ ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+ dd->ipath_unit);
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ plen >>= 2; /* in dwords */
+
+ if (ipath_debug & __IPATH_PKTDBG)
+ ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+ dd->ipath_unit, plen - 1, pbufn);
+
+ /* we have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order */
+ writeq(plen, piobuf);
+ ipath_flush_wc();
+ /* copy all by the trigger word, then flush, so it's written
+ * to chip before trigger word, then write trigger word, then
+ * flush again, so packet is sent. */
+ __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+ ipath_flush_wc();
+ __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ ipath_flush_wc();
+
+ ret = sizeof(dp);
+
+bail:
+ vfree(tmpbuf);
+ return ret;
+}
+
+static int ipath_diag_release(struct inode *in, struct file *fp)
{
mutex_lock(&ipath_mutex);
ipath_diag_inuse = 0;
+ fp->private_data = NULL;
mutex_unlock(&ipath_mutex);
return 0;
}
@@ -299,17 +439,10 @@ static int ipath_diag_release(struct inode *i, struct file *f)
static ssize_t ipath_diag_read(struct file *fp, char __user *data,
size_t count, loff_t *off)
{
- int unit = 0; /* XXX provide for reads on other units some day */
- struct ipath_devdata *dd;
+ struct ipath_devdata *dd = fp->private_data;
void __iomem *kreg_base;
ssize_t ret;
- dd = ipath_lookup(unit);
- if (!dd) {
- ret = -ENODEV;
- goto bail;
- }
-
kreg_base = dd->ipath_kregbase;
if (count == 0)
@@ -328,23 +461,16 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
ret = count;
}
-bail:
return ret;
}
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
- int unit = 0; /* XXX this is bogus */
- struct ipath_devdata *dd;
+ struct ipath_devdata *dd = fp->private_data;
void __iomem *kreg_base;
ssize_t ret;
- dd = ipath_lookup(unit);
- if (!dd) {
- ret = -ENODEV;
- goto bail;
- }
kreg_base = dd->ipath_kregbase;
if (count == 0)
@@ -363,6 +489,5 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
ret = count;
}
-bail:
return ret;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index dddcdae736ac..12cefa658f3b 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -38,8 +39,8 @@
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
-#include "ips_common.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -50,22 +51,20 @@ const char *ipath_get_unit_name(int unit)
return iname;
}
-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
-#define DRIVER_LOAD_MSG "PathScale " IPATH_DRV_NAME " loaded: "
+#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
#define PFX IPATH_DRV_NAME ": "
/*
* The size has to be longer than this string, so we can append
* board/chip information to it in the init code.
*/
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
static struct idr unit_table;
DEFINE_SPINLOCK(ipath_devs_lock);
LIST_HEAD(ipath_dev_list);
-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;
unsigned ipath_debug = __IPATH_INFO;
@@ -74,8 +73,8 @@ MODULE_PARM_DESC(debug, "mask for debug prints");
EXPORT_SYMBOL_GPL(ipath_debug);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("PathScale <support@pathscale.com>");
-MODULE_DESCRIPTION("Pathscale InfiniPath driver");
+MODULE_AUTHOR("QLogic <support@pathscale.com>");
+MODULE_DESCRIPTION("QLogic InfiniPath driver");
const char *ipath_ibcstatus_str[] = {
"Disabled",
@@ -96,16 +95,6 @@ const char *ipath_ibcstatus_str[] = {
"RecovIdle",
};
-/*
- * These variables are initialized in the chip-specific files
- * but are defined here.
- */
-u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-u64 ipath_gpio_sda, ipath_gpio_scl;
-u64 infinipath_i_bitsextant;
-ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
static void __devexit ipath_remove_one(struct pci_dev *);
static int __devinit ipath_init_one(struct pci_dev *,
const struct pci_device_id *);
@@ -130,14 +119,6 @@ static struct pci_driver ipath_driver = {
.id_table = ipath_pci_tbl,
};
-/*
- * This is where port 0's rcvhdrtail register is written back; we also
- * want nothing else sharing the cache line, so make it a cache line
- * in size. Used for all units.
- */
-volatile __le64 *ipath_port0_rcvhdrtail;
-dma_addr_t ipath_port0_rcvhdrtail_dma;
-static int port0_rcvhdrtail_refs;
static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
u32 *bar0, u32 *bar1)
@@ -170,14 +151,13 @@ static void ipath_free_devdata(struct pci_dev *pdev,
list_del(&dd->ipath_list);
spin_unlock_irqrestore(&ipath_devs_lock, flags);
}
- dma_free_coherent(&pdev->dev, sizeof(*dd), dd, dd->ipath_dma_addr);
+ vfree(dd);
}
static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
{
unsigned long flags;
struct ipath_devdata *dd;
- dma_addr_t dma_addr;
int ret;
if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
@@ -185,15 +165,12 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
goto bail;
}
- dd = dma_alloc_coherent(&pdev->dev, sizeof(*dd), &dma_addr,
- GFP_KERNEL);
-
+ dd = vmalloc(sizeof(*dd));
if (!dd) {
dd = ERR_PTR(-ENOMEM);
goto bail;
}
-
- dd->ipath_dma_addr = dma_addr;
+ memset(dd, 0, sizeof(*dd));
dd->ipath_unit = -1;
spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -271,47 +248,6 @@ int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
return nunits;
}
-static int init_port0_rcvhdrtail(struct pci_dev *pdev)
-{
- int ret;
-
- mutex_lock(&ipath_mutex);
-
- if (!ipath_port0_rcvhdrtail) {
- ipath_port0_rcvhdrtail =
- dma_alloc_coherent(&pdev->dev,
- IPATH_PORT0_RCVHDRTAIL_SIZE,
- &ipath_port0_rcvhdrtail_dma,
- GFP_KERNEL);
-
- if (!ipath_port0_rcvhdrtail) {
- ret = -ENOMEM;
- goto bail;
- }
- }
- port0_rcvhdrtail_refs++;
- ret = 0;
-
-bail:
- mutex_unlock(&ipath_mutex);
-
- return ret;
-}
-
-static void cleanup_port0_rcvhdrtail(struct pci_dev *pdev)
-{
- mutex_lock(&ipath_mutex);
-
- if (!--port0_rcvhdrtail_refs) {
- dma_free_coherent(&pdev->dev, IPATH_PORT0_RCVHDRTAIL_SIZE,
- (void *) ipath_port0_rcvhdrtail,
- ipath_port0_rcvhdrtail_dma);
- ipath_port0_rcvhdrtail = NULL;
- }
-
- mutex_unlock(&ipath_mutex);
-}
-
/*
* These next two routines are placeholders in case we don't have per-arch
* code for controlling write combining. If explicit control of write
@@ -336,20 +272,12 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
u32 bar0 = 0, bar1 = 0;
u8 rev;
- ret = init_port0_rcvhdrtail(pdev);
- if (ret < 0) {
- printk(KERN_ERR IPATH_DRV_NAME
- ": Could not allocate port0_rcvhdrtail: error %d\n",
- -ret);
- goto bail;
- }
-
dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate devdata: error %d\n", -ret);
- goto bail_rcvhdrtail;
+ goto bail;
}
ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
@@ -424,12 +352,29 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
*/
ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
if (ret) {
- dev_info(&pdev->dev, "pci_set_dma_mask unit %u "
- "fails: %d\n", dd->ipath_unit, ret);
+ dev_info(&pdev->dev,
+ "Unable to set DMA mask for unit %u: %d\n",
+ dd->ipath_unit, ret);
goto bail_regions;
}
- else
+ else {
ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
+ ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (ret)
+ dev_info(&pdev->dev,
+ "Unable to set DMA consistent mask "
+ "for unit %u: %d\n",
+ dd->ipath_unit, ret);
+
+ }
+ }
+ else {
+ ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ if (ret)
+ dev_info(&pdev->dev,
+ "Unable to set DMA consistent mask "
+ "for unit %u: %d\n",
+ dd->ipath_unit, ret);
}
pci_set_master(pdev);
@@ -446,13 +391,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
/* setup the chip-specific functions, as early as possible. */
switch (ent->device) {
case PCI_DEVICE_ID_INFINIPATH_HT:
- ipath_init_ht400_funcs(dd);
+ ipath_init_iba6110_funcs(dd);
break;
case PCI_DEVICE_ID_INFINIPATH_PE800:
- ipath_init_pe800_funcs(dd);
+ ipath_init_iba6120_funcs(dd);
break;
default:
- ipath_dev_err(dd, "Found unknown PathScale deviceid 0x%x, "
+ ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
"failing\n", ent->device);
return -ENODEV;
}
@@ -460,10 +405,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
for (j = 0; j < 6; j++) {
if (!pdev->resource[j].start)
continue;
- ipath_cdbg(VERBOSE, "BAR %d start %lx, end %lx, len %lx\n",
- j, pdev->resource[j].start,
- pdev->resource[j].end,
- pci_resource_len(pdev, j));
+ ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
+ j, (unsigned long long)pdev->resource[j].start,
+ (unsigned long long)pdev->resource[j].end,
+ (unsigned long long)pci_resource_len(pdev, j));
}
if (!addr) {
@@ -483,7 +428,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
dd->ipath_pcirev = rev;
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ dd->ipath_kregbase = __ioremap(addr, len,
+ (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
if (!dd->ipath_kregbase) {
ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@@ -495,23 +446,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
((void __iomem *)dd->ipath_kregbase + len);
dd->ipath_physaddr = addr; /* used for io_remap, etc. */
/* for user mmap */
- dd->ipath_kregvirt = (u64 __iomem *) phys_to_virt(addr);
- ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p "
- "kregvirt %p\n", addr, dd->ipath_kregbase,
- dd->ipath_kregvirt);
+ ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
+ addr, dd->ipath_kregbase);
/*
* clear ipath_flags here instead of in ipath_init_chip as it is set
* by ipath_setup_htconfig.
*/
dd->ipath_flags = 0;
+ dd->ipath_lli_counter = 0;
+ dd->ipath_lli_errors = 0;
if (dd->ipath_f_bus(dd, pdev))
ipath_dev_err(dd, "Failed to setup config space; "
"continuing anyway\n");
/*
- * set up our interrupt handler; SA_SHIRQ probably not needed,
+ * set up our interrupt handler; IRQF_SHARED probably not needed,
* since MSI interrupts shouldn't be shared but won't hurt for now.
* check 0 irq after we return from chip-specific bus setup, since
* that can affect this due to setup
@@ -520,7 +471,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
"work\n");
else {
- ret = request_irq(pdev->irq, ipath_intr, SA_SHIRQ,
+ ret = request_irq(pdev->irq, ipath_intr, IRQF_SHARED,
IPATH_DRV_NAME, dd);
if (ret) {
ipath_dev_err(dd, "Couldn't setup irq handler, "
@@ -545,7 +496,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipath_device_create_group(&pdev->dev, dd);
ipathfs_add_device(dd);
ipath_user_add(dd);
- ipath_layer_add(dd);
+ ipath_diag_add(dd);
+ ipath_register_ib_device(dd);
goto bail;
@@ -561,40 +513,155 @@ bail_disable:
bail_devdata:
ipath_free_devdata(pdev, dd);
-bail_rcvhdrtail:
- cleanup_port0_rcvhdrtail(pdev);
-
bail:
return ret;
}
+static void __devexit cleanup_device(struct ipath_devdata *dd)
+{
+ int port;
+
+ ipath_shutdown_device(dd);
+
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip; needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these are
+ * to ensure any register reads/writes "fail" until
+ * re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_uregbase = 0;
+ dd->ipath_sregbase = 0;
+ dd->ipath_cregbase = 0;
+ dd->ipath_kregsize = 0;
+ }
+ ipath_disable_wc(dd);
+ }
+
+ if (dd->ipath_pioavailregs_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *) dd->ipath_pioavailregs_dma,
+ dd->ipath_pioavailregs_phys);
+ dd->ipath_pioavailregs_dma = NULL;
+ }
+ if (dd->ipath_dummy_hdrq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->ipath_pd[0]->port_rcvhdrq_size,
+ dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+ dd->ipath_dummy_hdrq = NULL;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ dma_addr_t *tmpd = dd->ipath_physshadow;
+ int i, cnt = 0;
+
+ ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+ "locked\n");
+ for (port = 0; port < dd->ipath_cfgports; port++) {
+ int port_tidbase = port * dd->ipath_rcvtidcnt;
+ int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (!tmpp[i])
+ continue;
+ pci_unmap_page(dd->pcidev, tmpd[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ cnt++;
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ ipath_cdbg(VERBOSE, "There were still %u expTID "
+ "entries locked\n", cnt);
+ }
+ if (ipath_stats.sps_pagelocks ||
+ ipath_stats.sps_pageunlocks)
+ ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+ "unlocked via ipath_m{un}lock\n",
+ (unsigned long long)
+ ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+
+ ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just kernel ports)
+ * at unload; we do for portcnt, not cfgports, because cfgports
+ * could have changed while we were loaded.
+ */
+ for (port = 0; port < dd->ipath_portcnt; port++) {
+ struct ipath_portdata *pd = dd->ipath_pd[port];
+ dd->ipath_pd[port] = NULL;
+ ipath_free_pddata(dd, pd);
+ }
+ kfree(dd->ipath_pd);
+ /*
+ * debuggability, in case some cleanup path tries to use it
+ * after this
+ */
+ dd->ipath_pd = NULL;
+}
+
static void __devexit ipath_remove_one(struct pci_dev *pdev)
{
- struct ipath_devdata *dd;
+ struct ipath_devdata *dd = pci_get_drvdata(pdev);
- ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
- if (!pdev)
- return;
+ ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
- dd = pci_get_drvdata(pdev);
- ipath_layer_del(dd);
- ipath_user_del(dd);
+ if (dd->verbs_dev)
+ ipath_unregister_ib_device(dd->verbs_dev);
+
+ ipath_diag_remove(dd);
+ ipath_user_remove(dd);
ipathfs_remove_device(dd);
ipath_device_remove_group(&pdev->dev, dd);
+
ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
"unit %u\n", dd, (u32) dd->ipath_unit);
- if (dd->ipath_kregbase) {
- ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
- dd->ipath_kregbase);
- iounmap((volatile void __iomem *) dd->ipath_kregbase);
- dd->ipath_kregbase = NULL;
- }
+
+ cleanup_device(dd);
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ if (pdev->irq) {
+ ipath_cdbg(VERBOSE,
+ "unit %u free_irq of irq %x\n",
+ dd->ipath_unit, pdev->irq);
+ free_irq(pdev->irq, dd);
+ } else
+ ipath_dbg("irq is 0, not doing free_irq "
+ "for unit %u\n", dd->ipath_unit);
+ /*
+ * we check for NULL here, because it's outside
+ * the kregbase check, and we need to call it
+ * after the free_irq. Thus it's possible that
+ * the function pointers were never initialized.
+ */
+ if (dd->ipath_f_cleanup)
+ /* clean up chip-specific stuff */
+ dd->ipath_f_cleanup(dd);
+
+ ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+ iounmap((volatile void __iomem *) dd->ipath_kregbase);
pci_release_regions(pdev);
ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
pci_disable_device(pdev);
ipath_free_devdata(pdev, dd);
- cleanup_port0_rcvhdrtail(pdev);
}
/* general driver use */
@@ -652,21 +719,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
*
* wait up to msecs milliseconds for IB link state change to occur for
* now, take the easy polling route. Currently used only by
- * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise
+ * ipath_set_linkstate. Returns 0 if state reached, otherwise
* -ETIMEDOUT state can have multiple states set, for any of several
* transitions.
*/
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+ int msecs)
{
- dd->ipath_sma_state_wanted = state;
- wait_event_interruptible_timeout(ipath_sma_state_wait,
+ dd->ipath_state_wanted = state;
+ wait_event_interruptible_timeout(ipath_state_wait,
(dd->ipath_flags & state),
msecs_to_jiffies(msecs));
- dd->ipath_sma_state_wanted = 0;
+ dd->ipath_state_wanted = 0;
if (!(dd->ipath_flags & state)) {
u64 val;
- ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+ ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+ " ms\n",
/* test INIT ahead of DOWN, both can be set */
(state & IPATH_LINKINIT) ? "INIT" :
((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -799,8 +868,8 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len)
static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
int err)
{
- return dd->ipath_port0_skbs ?
- (void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+ return dd->ipath_port0_skbinfo ?
+ (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
}
/**
@@ -822,85 +891,69 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
*/
/*
- * We need 4 extra bytes for unaligned transfer copying
+ * We need 2 extra bytes for ipath_ether data sent in the
+ * key header. In order to keep everything dword aligned,
+ * we'll reserve 4 bytes.
*/
+ len = dd->ipath_ibmaxlen + 4;
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- /* we need a 4KB multiple alignment, and there is no way
+ /* We need a 2KB multiple alignment, and there is no way
* to do it except to allocate extra and then skb_reserve
* enough to bring it up to the right alignment.
*/
- len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+ len += 2047;
}
- else
- len = dd->ipath_ibmaxlen + 4;
+
skb = __dev_alloc_skb(len, gfp_mask);
if (!skb) {
ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
len);
goto bail;
}
+
+ skb_reserve(skb, 4);
+
if (dd->ipath_flags & IPATH_4BYTE_TID) {
- u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+ u32 una = (unsigned long)skb->data & 2047;
if (una)
- skb_reserve(skb, 4 + (1 << 11) - una);
- else
- skb_reserve(skb, 4);
- } else
- skb_reserve(skb, 4);
+ skb_reserve(skb, 2048 - una);
+ }
bail:
return skb;
}
-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
- u32 tlen, struct ether_header *hdr)
+static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
+ u32 eflags,
+ u32 l,
+ u32 etail,
+ u64 *rc)
{
- u32 elen;
- u8 pad, *bthbytes;
- struct sk_buff *skb, *nskb;
-
- if (dd->ipath_port0_skbs && hdr->sub_opcode == OPCODE_ENCAP) {
- /*
- * Allocate a new sk_buff to replace the one we give
- * to the network stack.
- */
- nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
- if (!nskb) {
- /* count OK packets that we drop */
- ipath_stats.sps_krdrops++;
- return;
+ char emsg[128];
+ struct ipath_message_header *hdr;
+
+ get_rhf_errstring(eflags, emsg, sizeof emsg);
+ hdr = (struct ipath_message_header *)&rc[1];
+ ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
+ "tlen=%x opcode=%x egridx=%x: %s\n",
+ eflags, l,
+ ipath_hdrget_rcv_type((__le32 *) rc),
+ ipath_hdrget_length_in_bytes((__le32 *) rc),
+ be32_to_cpu(hdr->bth[0]) >> 24,
+ etail, emsg);
+
+ /* Count local link integrity errors. */
+ if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
+ u8 n = (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+
+ if (++dd->ipath_lli_counter > n) {
+ dd->ipath_lli_counter = 0;
+ dd->ipath_lli_errors++;
}
-
- bthbytes = (u8 *) hdr->bth;
- pad = (bthbytes[1] >> 4) & 3;
- /* +CRC32 */
- elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
- skb = dd->ipath_port0_skbs[etail];
- dd->ipath_port0_skbs[etail] = nskb;
- skb_put(skb, elen);
-
- dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
- ((char __iomem *) dd->ipath_kregbase
- + dd->ipath_rcvegrbase), 0,
- virt_to_phys(nskb->data));
-
- __ipath_layer_rcv(dd, hdr, skb);
-
- /* another ether packet received */
- ipath_stats.sps_ether_rpkts++;
}
- else if (hdr->sub_opcode == OPCODE_LID_ARP)
- __ipath_layer_rcv_lid(dd, hdr);
}
/*
@@ -916,10 +969,9 @@ void ipath_kreceive(struct ipath_devdata *dd)
const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
u32 etail = -1, l, hdrqtail;
- struct ips_message_header *hdr;
- u32 eflags, i, etype, tlen, pkttot = 0;
+ struct ipath_message_header *hdr;
+ u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
static u64 totcalls; /* stats, may eventually remove */
- char emsg[128];
if (!dd->ipath_hdrqtailptr) {
ipath_dev_err(dd,
@@ -931,24 +983,18 @@ void ipath_kreceive(struct ipath_devdata *dd)
if (test_and_set_bit(0, &dd->ipath_rcv_pending))
goto bail;
- if (dd->ipath_port0head ==
- (u32)le64_to_cpu(*dd->ipath_hdrqtailptr))
+ l = dd->ipath_port0head;
+ hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
+ if (l == hdrqtail)
goto done;
-gotmore:
- /*
- * read only once at start. If in flood situation, this helps
- * performance slightly. If more arrive while we are processing,
- * we'll come back here and do them
- */
- hdrqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
-
- for (i = 0, l = dd->ipath_port0head; l != hdrqtail; i++) {
+reloop:
+ for (i = 0; l != hdrqtail; i++) {
u32 qp;
u8 *bthbytes;
rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2));
- hdr = (struct ips_message_header *)&rc[1];
+ hdr = (struct ipath_message_header *)&rc[1];
/*
* could make a network order version of IPATH_KD_QP, and
* do the obvious shift before masking to speed this up.
@@ -956,10 +1002,10 @@ gotmore:
qp = ntohl(hdr->bth[1]) & 0xffffff;
bthbytes = (u8 *) hdr->bth;
- eflags = ips_get_hdr_err_flags((__le32 *) rc);
- etype = ips_get_rcv_type((__le32 *) rc);
+ eflags = ipath_hdrget_err_flags((__le32 *) rc);
+ etype = ipath_hdrget_rcv_type((__le32 *) rc);
/* total length */
- tlen = ips_get_length_in_bytes((__le32 *) rc);
+ tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
ebuf = NULL;
if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
/*
@@ -969,7 +1015,7 @@ gotmore:
* set ebuf (so we try to copy data) unless the
* length requires it.
*/
- etail = ips_get_index((__le32 *) rc);
+ etail = ipath_hdrget_index((__le32 *) rc);
if (tlen > sizeof(*hdr) ||
etype == RCVHQ_RCV_TYPE_NON_KD)
ebuf = ipath_get_egrbuf(dd, etail, 0);
@@ -981,60 +1027,31 @@ gotmore:
*/
if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
- RCVHQ_RCV_TYPE_ERROR && ips_get_ipath_ver(
+ RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
hdr->iph.ver_port_tid_offset) !=
IPS_PROTO_VERSION) {
ipath_cdbg(PKT, "Bad InfiniPath protocol version "
"%x\n", etype);
}
- if (eflags & ~(INFINIPATH_RHF_H_TIDERR |
- INFINIPATH_RHF_H_IHDRERR)) {
- get_rhf_errstring(eflags, emsg, sizeof emsg);
- ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
- "tlen=%x opcode=%x egridx=%x: %s\n",
- eflags, l, etype, tlen, bthbytes[0],
- ips_get_index((__le32 *) rc), emsg);
- } else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
- int ret = __ipath_verbs_rcv(dd, rc + 1,
- ebuf, tlen);
- if (ret == -ENODEV)
- ipath_cdbg(VERBOSE,
- "received IB packet, "
- "not SMA (QP=%x)\n", qp);
- } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
- if (qp == IPATH_KD_QP &&
- bthbytes[0] == ipath_layer_rcv_opcode &&
- ebuf)
- ipath_rcv_layer(dd, etail, tlen,
- (struct ether_header *)hdr);
- else
- ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
- "qp=%x), len %x; ignored\n",
- etype, bthbytes[0], qp, tlen);
+ if (unlikely(eflags))
+ ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
+ else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
+ ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+ if (dd->ipath_lli_counter)
+ dd->ipath_lli_counter--;
+ ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+ "qp=%x), len %x; ignored\n",
+ etype, bthbytes[0], qp, tlen);
}
+ else if (etype == RCVHQ_RCV_TYPE_EAGER)
+ ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+ "qp=%x), len %x; ignored\n",
+ etype, bthbytes[0], qp, tlen);
else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
be32_to_cpu(hdr->bth[0]) & 0xff);
- else if (eflags & (INFINIPATH_RHF_H_TIDERR |
- INFINIPATH_RHF_H_IHDRERR)) {
- /*
- * This is a type 3 packet, only the LRH is in the
- * rcvhdrq, the rest of the header is in the eager
- * buffer.
- */
- u8 opcode;
- if (ebuf) {
- bthbytes = (u8 *) ebuf;
- opcode = *bthbytes;
- }
- else
- opcode = 0;
- get_rhf_errstring(eflags, emsg, sizeof emsg);
- ipath_dbg("Err %x (%s), opcode %x, egrbuf %x, "
- "len %x\n", eflags, emsg, opcode, etail,
- tlen);
- } else {
+ else {
/*
* error packet, type of error unknown.
* Probably type 3, but we don't know, so don't
@@ -1054,25 +1071,50 @@ gotmore:
l += rsize;
if (l >= maxcnt)
l = 0;
+ if (etype != RCVHQ_RCV_TYPE_EXPECTED)
+ updegr = 1;
/*
- * update for each packet, to help prevent overflows if we
- * have lots of packets.
+ * update head regs on last packet, and every 16 packets.
+ * Reduce bus traffic, while still trying to prevent
+ * rcvhdrq overflows, for when the queue is nearly full
*/
- (void)ipath_write_ureg(dd, ur_rcvhdrhead,
- dd->ipath_rhdrhead_intr_off | l, 0);
- if (etype != RCVHQ_RCV_TYPE_EXPECTED)
- (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
- etail, 0);
+ if (l == hdrqtail || (i && !(i&0xf))) {
+ u64 lval;
+ if (l == hdrqtail)
+ /* request IBA6120 interrupt only on last */
+ lval = dd->ipath_rhdrhead_intr_off | l;
+ else
+ lval = l;
+ (void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
+ if (updegr) {
+ (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
+ etail, 0);
+ updegr = 0;
+ }
+ }
+ }
+
+ if (!dd->ipath_rhdrhead_intr_off && !reloop) {
+ /* IBA6110 workaround; we can have a race clearing chip
+ * interrupt with another interrupt about to be delivered,
+ * and can clear it before it is delivered on the GPIO
+ * workaround. By doing the extra check here for the
+ * in-memory tail register updating while we were doing
+ * earlier packets, we "almost" guarantee we have covered
+ * that case.
+ */
+ u32 hqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
+ if (hqtail != hdrqtail) {
+ hdrqtail = hqtail;
+ reloop = 1; /* loop 1 extra time at most */
+ goto reloop;
+ }
}
pkttot += i;
dd->ipath_port0head = l;
- if (hdrqtail != (u32)le64_to_cpu(*dd->ipath_hdrqtailptr))
- /* more arrived while we handled first batch */
- goto gotmore;
-
if (pkttot > ipath_stats.sps_maxpkts_call)
ipath_stats.sps_maxpkts_call = pkttot;
ipath_stats.sps_port0pkts += pkttot;
@@ -1226,7 +1268,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
*
* do appropriate marking as busy, etc.
* returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
*/
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
{
@@ -1332,13 +1374,6 @@ rescan:
goto bail;
}
- if (updated)
- /*
- * ran out of bufs, now some (at least this one we just
- * got) are now available, so tell the layered driver.
- */
- __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
/*
* set next starting place. Since it's just an optimization,
* it doesn't matter who wins on this, so no locking
@@ -1369,26 +1404,20 @@ bail:
* @dd: the infinipath device
* @pd: the port data
*
- * this *must* be physically contiguous memory, and for now,
- * that limits it to what kmalloc can do.
+ * this must be contiguous memory (from an i/o perspective), and must be
+ * DMA'able (which means for some systems, it will go through an IOMMU,
+ * or be forced into a low address range).
*/
int ipath_create_rcvhdrq(struct ipath_devdata *dd,
struct ipath_portdata *pd)
{
- int ret = 0, amt;
+ int ret = 0;
- amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
- sizeof(u32), PAGE_SIZE);
if (!pd->port_rcvhdrq) {
- /*
- * not using REPEAT isn't viable; at 128KB, we can easily
- * fail this. The problem with REPEAT is we can block here
- * "forever". There isn't an inbetween, unfortunately. We
- * could reduce the risk by never freeing the rcvhdrq except
- * at unload, but even then, the first time a port is used,
- * we could delay for some time...
- */
+ dma_addr_t phys_hdrqtail;
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
+ int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE);
pd->port_rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
@@ -1401,6 +1430,19 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ret = -ENOMEM;
goto bail;
}
+ pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
+ &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
+ if (!pd->port_rcvhdrtail_kvaddr) {
+ ipath_dev_err(dd, "attempt to allocate 1 page "
+ "for port %u rcvhdrqtailaddr failed\n",
+ pd->port_port);
+ ret = -ENOMEM;
+ dma_free_coherent(&dd->pcidev->dev, amt,
+ pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
+ goto bail;
+ }
+ pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
pd->port_rcvhdrq_size = amt;
@@ -1410,20 +1452,29 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
(unsigned long) pd->port_rcvhdrq_phys,
(unsigned long) pd->port_rcvhdrq_size,
pd->port_port);
- } else {
- /*
- * clear for security, sanity, and/or debugging, each
- * time we reuse
- */
- memset(pd->port_rcvhdrq, 0, amt);
+
+ ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
+ pd->port_port,
+ (unsigned long long) phys_hdrqtail);
}
+ else
+ ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
+ "hdrtailaddr@%p %llx physical\n",
+ pd->port_port, pd->port_rcvhdrq,
+ (unsigned long long) pd->port_rcvhdrq_phys,
+ pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+ pd->port_rcvhdrqtailaddr_phys);
+
+ /* clear for security and sanity on each use */
+ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
+ memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/*
* tell chip each time we init it, even if we are re-using previous
- * memory (we zero it at process close)
+ * memory (we zero the register at process close)
*/
- ipath_cdbg(VERBOSE, "writing port %d rcvhdraddr as %lx\n",
- pd->port_port, (unsigned long) pd->port_rcvhdrq_phys);
+ ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
+ pd->port_port, pd->port_rcvhdrqtailaddr_phys);
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, pd->port_rcvhdrq_phys);
@@ -1503,7 +1554,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
return ret;
}
-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
{
static const char *what[4] = {
[0] = "DOWN",
@@ -1511,20 +1562,206 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
};
- ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+ int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
+ INFINIPATH_IBCC_LINKCMD_MASK;
+
+ ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
"is %s\n", dd->ipath_unit,
- what[(which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
- INFINIPATH_IBCC_LINKCMD_MASK],
+ what[linkcmd],
ipath_ibcstatus_str[
(ipath_read_kreg64
(dd, dd->ipath_kregs->kr_ibcstatus) >>
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
+ /* flush all queued sends when going to DOWN or INIT, to be sure that
+ * they don't block MAD packets */
+ if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ INFINIPATH_S_ABORT);
+ ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
+ (unsigned)(dd->ipath_piobcnt2k +
+ dd->ipath_piobcnt4k) -
+ dd->ipath_lastport_piobuf);
+ }
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl | which);
}
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+ u32 lstate;
+ int ret;
+
+ switch (newstate) {
+ case IPATH_IB_LINKDOWN:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN_SLEEP:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN_DISABLE:
+ ipath_set_ib_lstate(dd,
+ INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKINIT:
+ if (dd->ipath_flags & IPATH_LINKINIT) {
+ ret = 0;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT);
+ lstate = IPATH_LINKINIT;
+ break;
+
+ case IPATH_IB_LINKARM:
+ if (dd->ipath_flags & IPATH_LINKARMED) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(dd->ipath_flags &
+ (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT);
+ /*
+ * Since the port can transition to ACTIVE by receiving
+ * a non VL 15 packet, wait for either state.
+ */
+ lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+ break;
+
+ case IPATH_IB_LINKACTIVE:
+ if (dd->ipath_flags & IPATH_LINKACTIVE) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT);
+ lstate = IPATH_LINKACTIVE;
+ break;
+
+ default:
+ ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+ ret = -EINVAL;
+ goto bail;
+ }
+ ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size. For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+ u32 piosize;
+ int changed = 0;
+ int ret;
+
+ /*
+ * mtu is IB data payload max. It's the largest power of 2 less
+ * than piosize (or even larger, since it only really controls the
+ * largest we can receive; we can send the max of the mtu and
+ * piosize). We check that it's one of the valid IB sizes.
+ */
+ if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+ arg != 4096) {
+ ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (dd->ipath_ibmtu == arg) {
+ ret = 0; /* same as current */
+ goto bail;
+ }
+
+ piosize = dd->ipath_ibmaxlen;
+ dd->ipath_ibmtu = arg;
+
+ if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+ /* Only if it's not the initial value (or reset to it) */
+ if (piosize != dd->ipath_init_ibmaxlen) {
+ dd->ipath_ibmaxlen = piosize;
+ changed = 1;
+ }
+ } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+ piosize = arg + IPATH_PIO_MAXIBHDR;
+ ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+ "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+ arg);
+ dd->ipath_ibmaxlen = piosize;
+ changed = 1;
+ }
+
+ if (changed) {
+ /*
+ * set the IBC maxpktlength to the size of our pio
+ * buffers in words
+ */
+ u64 ibc = dd->ipath_ibcctrl;
+ ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+ INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+ piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
+ dd->ipath_ibmaxlen = piosize;
+ piosize /= sizeof(u32); /* in words */
+ /*
+ * for ICRC, which we only send in diag test pkt mode, and
+ * we don't need to worry about that for mtu
+ */
+ piosize += 1;
+
+ ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+ dd->ipath_ibcctrl = ibc;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ dd->ipath_f_tidtemplate(dd);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+ dd->ipath_lid = arg;
+ dd->ipath_lmc = lmc;
+
+ return 0;
+}
+
/**
* ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
* @dd: the infinipath device
@@ -1628,17 +1865,10 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /*
- * we are shutting down, so tell the layered driver. We don't do
- * this on just a link state change, much like ethernet, a cable
- * unplug, etc. doesn't change driver state
- */
- ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
/* disable IBC */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- dd->ipath_control);
+ dd->ipath_control | INFINIPATH_C_FREEZEMODE);
/*
* clear SerdesEnable and turn the leds off; do this here because
@@ -1667,82 +1897,82 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
/**
* ipath_free_pddata - free a port's allocated data
* @dd: the infinipath device
- * @port: the port
- * @freehdrq: free the port data structure if true
+ * @pd: the portdata structure
*
- * when closing, free up any allocated data for a port, if the
- * reference count goes to zero
- * Note: this also optionally frees the portdata itself!
- * Any changes here have to be matched up with the reinit case
- * of ipath_init_chip(), which calls this routine on reinit after reset.
+ * free up any allocated data for a port
+ * This should not touch anything that would affect a simultaneous
+ * re-allocation of port data, because it is called after ipath_mutex
+ * is released (and can be called from reinit as well).
+ * It should never change any chip state, or global driver state.
+ * (The only exception to global state is freeing the port0 port0_skbs.)
*/
-void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq)
+void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
{
- struct ipath_portdata *pd = dd->ipath_pd[port];
-
if (!pd)
return;
- if (freehdrq)
- /*
- * only clear and free portdata if we are going to also
- * release the hdrq, otherwise we leak the hdrq on each
- * open/close cycle
- */
- dd->ipath_pd[port] = NULL;
- if (freehdrq && pd->port_rcvhdrq) {
+
+ if (pd->port_rcvhdrq) {
ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
"(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
(unsigned long) pd->port_rcvhdrq_size);
dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
pd->port_rcvhdrq = NULL;
+ if (pd->port_rcvhdrtail_kvaddr) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ pd->port_rcvhdrtail_kvaddr,
+ pd->port_rcvhdrqtailaddr_phys);
+ pd->port_rcvhdrtail_kvaddr = NULL;
+ }
}
- if (port && pd->port_rcvegrbuf) {
- /* always free this */
- if (pd->port_rcvegrbuf) {
- unsigned e;
-
- for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
- void *base = pd->port_rcvegrbuf[e];
- size_t size = pd->port_rcvegrbuf_size;
-
- ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
- "chunk %u/%u\n", base,
- (unsigned long) size,
- e, pd->port_rcvegrbuf_chunks);
- dma_free_coherent(
- &dd->pcidev->dev, size, base,
- pd->port_rcvegrbuf_phys[e]);
- }
- vfree(pd->port_rcvegrbuf);
- pd->port_rcvegrbuf = NULL;
- vfree(pd->port_rcvegrbuf_phys);
- pd->port_rcvegrbuf_phys = NULL;
+ if (pd->port_port && pd->port_rcvegrbuf) {
+ unsigned e;
+
+ for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
+ void *base = pd->port_rcvegrbuf[e];
+ size_t size = pd->port_rcvegrbuf_size;
+
+ ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
+ "chunk %u/%u\n", base,
+ (unsigned long) size,
+ e, pd->port_rcvegrbuf_chunks);
+ dma_free_coherent(&dd->pcidev->dev, size,
+ base, pd->port_rcvegrbuf_phys[e]);
}
+ kfree(pd->port_rcvegrbuf);
+ pd->port_rcvegrbuf = NULL;
+ kfree(pd->port_rcvegrbuf_phys);
+ pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0;
- } else if (port == 0 && dd->ipath_port0_skbs) {
+ } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
unsigned e;
- struct sk_buff **skbs = dd->ipath_port0_skbs;
+ struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
- dd->ipath_port0_skbs = NULL;
- ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
- "@ %p\n", pd->port_port, skbs);
+ dd->ipath_port0_skbinfo = NULL;
+ ipath_cdbg(VERBOSE, "free closed port %d "
+ "ipath_port0_skbinfo @ %p\n", pd->port_port,
+ skbinfo);
for (e = 0; e < dd->ipath_rcvegrcnt; e++)
- if (skbs[e])
- dev_kfree_skb(skbs[e]);
- vfree(skbs);
- }
- if (freehdrq) {
- kfree(pd->port_tid_pg_list);
- kfree(pd);
+ if (skbinfo[e].skb) {
+ pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+ dd->ipath_ibmaxlen,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(skbinfo[e].skb);
+ }
+ vfree(skbinfo);
}
+ kfree(pd->port_tid_pg_list);
+ vfree(pd->subport_uregbase);
+ vfree(pd->subport_rcvegrbuf);
+ vfree(pd->subport_rcvhdr_base);
+ kfree(pd);
}
static int __init infinipath_init(void)
{
int ret;
- ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+ ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
/*
* These must be called before the driver is registered with
@@ -1775,8 +2005,18 @@ static int __init infinipath_init(void)
goto bail_group;
}
+ ret = ipath_diagpkt_add();
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+ "diag data device: error %d\n", -ret);
+ goto bail_ipathfs;
+ }
+
goto bail;
+bail_ipathfs:
+ ipath_exit_ipathfs();
+
bail_group:
ipath_driver_remove_group(&ipath_driver.driver);
@@ -1790,139 +2030,12 @@ bail:
return ret;
}
-static void cleanup_device(struct ipath_devdata *dd)
-{
- int port;
-
- ipath_shutdown_device(dd);
-
- if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
- /* can't do anything more with chip; needs re-init */
- *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
- if (dd->ipath_kregbase) {
- /*
- * if we haven't already cleaned up before these are
- * to ensure any register reads/writes "fail" until
- * re-init
- */
- dd->ipath_kregbase = NULL;
- dd->ipath_kregvirt = NULL;
- dd->ipath_uregbase = 0;
- dd->ipath_sregbase = 0;
- dd->ipath_cregbase = 0;
- dd->ipath_kregsize = 0;
- }
- ipath_disable_wc(dd);
- }
-
- if (dd->ipath_pioavailregs_dma) {
- dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *) dd->ipath_pioavailregs_dma,
- dd->ipath_pioavailregs_phys);
- dd->ipath_pioavailregs_dma = NULL;
- }
-
- if (dd->ipath_pageshadow) {
- struct page **tmpp = dd->ipath_pageshadow;
- int i, cnt = 0;
-
- ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
- "locked\n");
- for (port = 0; port < dd->ipath_cfgports; port++) {
- int port_tidbase = port * dd->ipath_rcvtidcnt;
- int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
- for (i = port_tidbase; i < maxtid; i++) {
- if (!tmpp[i])
- continue;
- ipath_release_user_pages(&tmpp[i], 1);
- tmpp[i] = NULL;
- cnt++;
- }
- }
- if (cnt) {
- ipath_stats.sps_pageunlocks += cnt;
- ipath_cdbg(VERBOSE, "There were still %u expTID "
- "entries locked\n", cnt);
- }
- if (ipath_stats.sps_pagelocks ||
- ipath_stats.sps_pageunlocks)
- ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
- "unlocked via ipath_m{un}lock\n",
- (unsigned long long)
- ipath_stats.sps_pagelocks,
- (unsigned long long)
- ipath_stats.sps_pageunlocks);
-
- ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
- dd->ipath_pageshadow);
- vfree(dd->ipath_pageshadow);
- dd->ipath_pageshadow = NULL;
- }
-
- /*
- * free any resources still in use (usually just kernel ports)
- * at unload
- */
- for (port = 0; port < dd->ipath_cfgports; port++)
- ipath_free_pddata(dd, port, 1);
- kfree(dd->ipath_pd);
- /*
- * debuggability, in case some cleanup path tries to use it
- * after this
- */
- dd->ipath_pd = NULL;
-}
-
static void __exit infinipath_cleanup(void)
{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
ipath_exit_ipathfs();
ipath_driver_remove_group(&ipath_driver.driver);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- /*
- * turn off rcv, send, and interrupts for all ports, all drivers
- * should also hard reset the chip here?
- * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
- * for all versions of the driver, if they were allocated
- */
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- if (dd->ipath_kregbase)
- cleanup_device(dd);
-
- if (dd->pcidev) {
- if (dd->pcidev->irq) {
- ipath_cdbg(VERBOSE,
- "unit %u free_irq of irq %x\n",
- dd->ipath_unit, dd->pcidev->irq);
- free_irq(dd->pcidev->irq, dd);
- } else
- ipath_dbg("irq is 0, not doing free_irq "
- "for unit %u\n", dd->ipath_unit);
-
- /*
- * we check for NULL here, because it's outside
- * the kregbase check, and we need to call it
- * after the free_irq. Thus it's possible that
- * the function pointers were never initialized.
- */
- if (dd->ipath_f_cleanup)
- /* clean up chip-specific stuff */
- dd->ipath_f_cleanup(dd);
-
- dd->pcidev = NULL;
- }
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
pci_unregister_driver(&ipath_driver);
@@ -1988,5 +2101,22 @@ bail:
return ret;
}
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+ u64 val;
+ if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+ return -1;
+ }
+ if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+ dd->ipath_rx_pol_inv = new_pol_inv;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= ((u64)dd->ipath_rx_pol_inv) <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ }
+ return 0;
+}
module_init(infinipath_init);
module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a2f1ceafcca9..a4019a6b7560 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -99,9 +100,9 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
gpioval = &dd->ipath_gpio_out;
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
+ mask = dd->ipath_gpio_sda;
if (new_line_state == i2c_line_high)
/* tri-state the output rather than force high */
@@ -118,12 +119,12 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
write_val = 0x0UL;
if (line == i2c_line_scl) {
- write_val <<= ipath_gpio_scl_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
+ write_val <<= dd->ipath_gpio_scl_num;
+ *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
*gpioval |= write_val;
} else {
- write_val <<= ipath_gpio_sda_num;
- *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
+ write_val <<= dd->ipath_gpio_sda_num;
+ *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
*gpioval |= write_val;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
@@ -156,9 +157,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
- mask = ipath_gpio_scl;
+ mask = dd->ipath_gpio_scl;
else
- mask = ipath_gpio_sda;
+ mask = dd->ipath_gpio_sda;
write_val = read_val & ~mask;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
@@ -186,6 +187,7 @@ bail:
static void i2c_wait_for_writes(struct ipath_devdata *dd)
{
(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ rmb();
}
static void scl_out(struct ipath_devdata *dd, u8 bit)
@@ -600,8 +602,31 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
guid = *(__be64 *) ifp->if_guid;
dd->ipath_guid = guid;
dd->ipath_nguid = ifp->if_numguid;
- memcpy(dd->ipath_serial, ifp->if_serial,
- sizeof(ifp->if_serial));
+ /*
+ * Things are slightly complicated by the desire to transparently
+ * support both the Pathscale 10-digit serial number and the QLogic
+ * 13-character version.
+ */
+ if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
+ && ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
+ /* This board has a Serial-prefix, which is stored
+ * elsewhere for backward-compatibility.
+ */
+ char *snp = dd->ipath_serial;
+ int len;
+ memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
+ snp[sizeof ifp->if_sprefix] = '\0';
+ len = strlen(snp);
+ snp += len;
+ len = (sizeof dd->ipath_serial) - len;
+ if (len > sizeof ifp->if_serial) {
+ len = sizeof ifp->if_serial;
+ }
+ memcpy(snp, ifp->if_serial, len);
+ } else
+ memcpy(dd->ipath_serial, ifp->if_serial,
+ sizeof ifp->if_serial);
+
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index ada267e41f6c..a9ddc6911f66 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -38,8 +39,13 @@
#include <asm/pgtable.h>
#include "ipath_kernel.h"
-#include "ips_common.h"
-#include "ipath_layer.h"
+#include "ipath_common.h"
+
+/*
+ * mmap64 doesn't allow all 64 bits for 32-bit applications
+ * so only use the low 43 bits.
+ */
+#define MMAP64_MASK 0x7FFFFFFFFFFUL
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
@@ -57,18 +63,35 @@ static struct file_operations ipath_file_ops = {
.mmap = ipath_mmap
};
-static int ipath_get_base_info(struct ipath_portdata *pd,
+static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
+ struct ipath_portdata *pd = port_fp(fp);
int ret = 0;
struct ipath_base_info *kinfo = NULL;
struct ipath_devdata *dd = pd->port_dd;
+ unsigned subport_cnt;
+ int shared, master;
+ size_t sz;
+
+ subport_cnt = pd->port_subport_cnt;
+ if (!subport_cnt) {
+ shared = 0;
+ master = 0;
+ subport_cnt = 1;
+ } else {
+ shared = 1;
+ master = !subport_fp(fp);
+ }
- if (ubase_size < sizeof(*kinfo)) {
+ sz = sizeof(*kinfo);
+ /* If port sharing is not requested, allow the old size structure */
+ if (!shared)
+ sz -= 3 * sizeof(u64);
+ if (ubase_size < sz) {
ipath_cdbg(PROC,
- "Base size %lu, need %lu (version mismatch?)\n",
- (unsigned long) ubase_size,
- (unsigned long) sizeof(*kinfo));
+ "Base size %zu, need %zu (version mismatch?)\n",
+ ubase_size, sz);
ret = -EINVAL;
goto bail;
}
@@ -95,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
pd->port_rcvegrbuf_chunks;
- kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
+ kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+ if (master)
+ kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
/*
* for this use, may be ipath_cfgports summed over all chips that
* are are configured and present
@@ -118,30 +143,75 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* page_address() macro worked, but in 2.6.11, even that returns the
* full 64 bit address (upper bits all 1's). So far, using the
* physical addresses (or chip offsets, for chip mapping) works, but
- * no doubt some future kernel release will chang that, and we'll be
- * on to yet another method of dealing with this
+ * no doubt some future kernel release will change that, and we'll be
+ * on to yet another method of dealing with this.
*/
kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
- kinfo->spi_piobufbase = (u64) pd->port_piobufs;
- kinfo->__spi_uregbase =
- dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ if (!shared) {
+ kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ } else if (master) {
+ kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
+ (dd->ipath_pbufsport % subport_cnt);
+ /* Master's PIO buffers are after all the slave's */
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign *
+ (dd->ipath_pbufsport - kinfo->spi_piocnt);
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign * kinfo->spi_piocnt * slave;
+ kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
+ PAGE_SIZE * slave) & MMAP64_MASK;
- kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
- kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
+ kinfo->spi_rcvhdr_tailaddr =
+ (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
+ kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
+ dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
+ MMAP64_MASK;
+ }
+
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
+ dd->ipath_palign;
kinfo->spi_pioalign = dd->ipath_palign;
kinfo->spi_qpair = IPATH_KD_QP;
kinfo->spi_piosize = dd->ipath_ibmaxlen;
kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
kinfo->spi_port = pd->port_port;
+ kinfo->spi_subport = subport_fp(fp);
kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
kinfo->spi_hw_version = dd->ipath_revision;
+ if (master) {
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+ kinfo->spi_subport_uregbase =
+ (u64) pd->subport_uregbase & MMAP64_MASK;
+ kinfo->spi_subport_rcvegrbuf =
+ (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
+ kinfo->spi_subport_rcvhdr_base =
+ (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
+ }
+
if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
ret = -EFAULT;
@@ -153,6 +223,7 @@ bail:
/**
* ipath_tid_update - update a port TID
* @pd: the port
+ * @fp: the ipath device file
* @ti: the TID information
*
* The new implementation as of Oct 2004 is that the driver assigns
@@ -175,11 +246,11 @@ bail:
* virtually contiguous pages, that should change to improve
* performance.
*/
-static int ipath_tid_update(struct ipath_portdata *pd,
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
const struct ipath_tid_info *ti)
{
int ret = 0, ntids;
- u32 tid, porttid, cnt, i, tidcnt;
+ u32 tid, porttid, cnt, i, tidcnt, tidoff;
u16 *tidlist;
struct ipath_devdata *dd = pd->port_dd;
u64 physaddr;
@@ -187,6 +258,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
u64 __iomem *tidbase;
unsigned long tidmap[8];
struct page **pagep = NULL;
+ unsigned subport = subport_fp(fp);
if (!dd->ipath_pageshadow) {
ret = -ENOMEM;
@@ -203,20 +275,34 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -EFAULT;
goto done;
}
- tidcnt = dd->ipath_rcvtidcnt;
- if (cnt >= tidcnt) {
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt) {
+ tidcnt = dd->ipath_rcvtidcnt;
+ tid = pd->port_tidcursor;
+ tidoff = 0;
+ } else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ tidoff = dd->ipath_rcvtidcnt - tidcnt;
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ tidoff = tidcnt * (subport - 1);
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ }
+ if (cnt > tidcnt) {
/* make sure it all fits in port_tid_pg_list */
dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
"TIDs, only trying max (%u)\n", cnt, tidcnt);
cnt = tidcnt;
}
- pagep = (struct page **)pd->port_tid_pg_list;
- tidlist = (u16 *) (&pagep[cnt]);
+ pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+ tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
memset(tidmap, 0, sizeof(tidmap));
- tid = pd->port_tidcursor;
/* before decrement; chip actual # */
- porttid = pd->port_port * tidcnt;
ntids = tidcnt;
tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
@@ -273,16 +359,19 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -ENOMEM;
break;
}
- tidlist[i] = tid;
+ tidlist[i] = tid + tidoff;
ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
- "vaddr %lx\n", i, tid, vaddr);
+ "vaddr %lx\n", i, tid + tidoff, vaddr);
/* we "know" system pages and TID pages are same size */
dd->ipath_pageshadow[porttid + tid] = pagep[i];
+ dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+ dd->pcidev, pagep[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
/*
* don't need atomic or it's overhead
*/
__set_bit(tid, tidmap);
- physaddr = page_to_phys(pagep[i]);
+ physaddr = dd->ipath_physshadow[porttid + tid];
ipath_stats.sps_pagelocks++;
ipath_cdbg(VERBOSE,
"TID %u, vaddr %lx, physaddr %llx pgp %p\n",
@@ -316,6 +405,9 @@ static int ipath_tid_update(struct ipath_portdata *pd,
tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_stats.sps_pageunlocks++;
}
@@ -340,7 +432,10 @@ static int ipath_tid_update(struct ipath_portdata *pd,
}
if (tid == tidcnt)
tid = 0;
- pd->port_tidcursor = tid;
+ if (!pd->port_subport_cnt)
+ pd->port_tidcursor = tid;
+ else
+ tidcursor_fp(fp) = tid;
}
done:
@@ -353,6 +448,7 @@ done:
/**
* ipath_tid_free - free a port TID
* @pd: the port
+ * @subport: the subport
* @ti: the TID info
*
* right now we are unlocking one page at a time, but since
@@ -366,7 +462,7 @@ done:
* they pass in to us.
*/
-static int ipath_tid_free(struct ipath_portdata *pd,
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
const struct ipath_tid_info *ti)
{
int ret = 0;
@@ -387,11 +483,20 @@ static int ipath_tid_free(struct ipath_portdata *pd,
}
porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt)
+ tidcnt = dd->ipath_rcvtidcnt;
+ else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ porttid += dd->ipath_rcvtidcnt - tidcnt;
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ porttid += tidcnt * (subport - 1);
+ }
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
porttid * sizeof(*tidbase));
- tidcnt = dd->ipath_rcvtidcnt;
limit = sizeof(tidmap) * BITS_PER_BYTE;
if (limit > tidcnt)
/* just in case size changes in future */
@@ -416,6 +521,9 @@ static int ipath_tid_free(struct ipath_portdata *pd,
pd->port_pid, tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(
&dd->ipath_pageshadow[porttid + tid], 1);
dd->ipath_pageshadow[porttid + tid] = NULL;
@@ -456,7 +564,7 @@ static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
u16 lkey = key & 0x7FFF;
int ret;
- if (lkey == (IPS_DEFAULT_P_KEY & 0x7FFF)) {
+ if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
/* nothing to do; this key always valid */
ret = 0;
goto bail;
@@ -580,20 +688,24 @@ bail:
/**
* ipath_manage_rcvq - manage a port's receive queue
* @pd: the port
+ * @subport: the subport
* @start_stop: action to carry out
*
* start_stop == 0 disables receive on the port, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+ int start_stop)
{
struct ipath_devdata *dd = pd->port_dd;
u64 tval;
- ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
+ ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
start_stop ? "en" : "dis", dd->ipath_unit,
- pd->port_port);
+ pd->port_port, subport);
+ if (subport)
+ goto bail;
/* atomically clear receive enable port. */
if (start_stop) {
/*
@@ -608,7 +720,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
* updated and correct itself, even in the face of software
* bugs.
*/
- *pd->port_rcvhdrtail_kvaddr = 0;
+ *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
} else
@@ -629,6 +741,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
}
/* always; new head should be equal to new tail; see above */
+bail:
return 0;
}
@@ -686,6 +799,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
}
}
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned egrperchunk, egrcnt, size;
+
+ /*
+ * to avoid wasting a lot of memory, we allocate 32KB chunks of
+ * physically contiguous memory, advance through it until used up
+ * and then allocate more. Of course, we need memory to store those
+ * extra pointers, now. Started out with 256KB, but under heavy
+ * memory pressure (creating large files and then copying them over
+ * NFS while doing lots of MPI jobs), we hit some allocation
+ * failures, even though we can sleep... (2.6.10) Still get
+ * failures at 64K. 32K is the lowest we can go without wasting
+ * additional memory.
+ */
+ size = 0x8000;
+ egrperchunk = size / dd->ipath_rcvegrbufsize;
+ egrcnt = dd->ipath_rcvegrcnt;
+ pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+ pd->port_rcvegrbufs_perchunk = egrperchunk;
+ pd->port_rcvegrbuf_size = size;
+}
+
/**
* ipath_create_user_egr - allocate eager TID buffers
* @pd: the port to allocate TID buffers for
@@ -701,9 +844,18 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
static int ipath_create_user_egr(struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
- unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
+ unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
int ret;
+ gfp_t gfp_flags;
+
+ /*
+ * GFP_USER, but without GFP_FS, so buffer cache can be
+ * coalesced (we hope); otherwise, even at order 4,
+ * heavy filesystem activity makes these fail, and we can
+ * use compound pages.
+ */
+ gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
egrcnt = dd->ipath_rcvegrcnt;
/* TID number offset for this port */
@@ -712,44 +864,23 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
"offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
- /*
- * to avoid wasting a lot of memory, we allocate 32KB chunks of
- * physically contiguous memory, advance through it until used up
- * and then allocate more. Of course, we need memory to store those
- * extra pointers, now. Started out with 256KB, but under heavy
- * memory pressure (creating large files and then copying them over
- * NFS while doing lots of MPI jobs), we hit some allocation
- * failures, even though we can sleep... (2.6.10) Still get
- * failures at 64K. 32K is the lowest we can go without waiting
- * more memory again. It seems likely that the coalescing in
- * free_pages, etc. still has issues (as it has had previously
- * during 2.6.x development).
- */
- size = 0x8000;
- alloced = ALIGN(egrsize * egrcnt, size);
- egrperchunk = size / egrsize;
- chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
- pd->port_rcvegrbuf_chunks = chunk;
- pd->port_rcvegrbufs_perchunk = egrperchunk;
- pd->port_rcvegrbuf_size = size;
- pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
+ chunk = pd->port_rcvegrbuf_chunks;
+ egrperchunk = pd->port_rcvegrbufs_perchunk;
+ size = pd->port_rcvegrbuf_size;
+ pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf) {
ret = -ENOMEM;
goto bail;
}
pd->port_rcvegrbuf_phys =
- vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
+ kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+ GFP_KERNEL);
if (!pd->port_rcvegrbuf_phys) {
ret = -ENOMEM;
goto bail_rcvegrbuf;
}
for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
- /*
- * GFP_USER, but without GFP_FS, so buffer cache can be
- * coalesced (we hope); otherwise, even at order 4,
- * heavy filesystem activity makes these fail
- */
- gfp_t gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
pd->port_rcvegrbuf[e] = dma_alloc_coherent(
&dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
@@ -783,154 +914,63 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
bail_rcvegrbuf_phys:
for (e = 0; e < pd->port_rcvegrbuf_chunks &&
- pd->port_rcvegrbuf[e]; e++)
+ pd->port_rcvegrbuf[e]; e++) {
dma_free_coherent(&dd->pcidev->dev, size,
pd->port_rcvegrbuf[e],
pd->port_rcvegrbuf_phys[e]);
- vfree(pd->port_rcvegrbuf_phys);
+ }
+ kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
bail_rcvegrbuf:
- vfree(pd->port_rcvegrbuf);
+ kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
bail:
return ret;
}
-static int ipath_do_user_init(struct ipath_portdata *pd,
- const struct ipath_user_info *uinfo)
+
+/* common code for the mappings on dma_alloc_coherent mem */
+static int ipath_mmap_mem(struct vm_area_struct *vma,
+ struct ipath_portdata *pd, unsigned len, int write_ok,
+ void *kvaddr, char *what)
{
- int ret = 0;
struct ipath_devdata *dd = pd->port_dd;
- u64 physaddr, uaddr, off, atmp;
- struct page *pagep;
- u32 head32;
- u64 head;
+ unsigned long pfn;
+ int ret;
- /* for now, if major version is different, bail */
- if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+ if ((vma->vm_end - vma->vm_start) > len) {
dev_info(&dd->pcidev->dev,
- "User major version %d not same as driver "
- "major %d\n", uinfo->spu_userversion >> 16,
- IPATH_USER_SWMAJOR);
- ret = -ENODEV;
- goto done;
- }
-
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
- ipath_dbg("User minor version %d not same as driver "
- "minor %d\n", uinfo->spu_userversion & 0xffff,
- IPATH_USER_SWMINOR);
-
- if (uinfo->spu_rcvhdrsize) {
- ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
- if (ret)
- goto done;
+ "FAIL on %s: len %lx > %x\n", what,
+ vma->vm_end - vma->vm_start, len);
+ ret = -EFAULT;
+ goto bail;
}
- /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
- /* set up for the rcvhdr Q tail register writeback to user memory */
- if (!uinfo->spu_rcvhdraddr ||
- !access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long)
- uinfo->spu_rcvhdraddr, sizeof(u64))) {
- ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n",
- pd->port_port,
- (unsigned long long) uinfo->spu_rcvhdraddr);
- ret = -EINVAL;
- goto done;
- }
+ if (!write_ok) {
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "%s must be mapped readonly\n", what);
+ ret = -EPERM;
+ goto bail;
+ }
- off = offset_in_page(uinfo->spu_rcvhdraddr);
- uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr;
- ret = ipath_get_user_pages_nocopy(uaddr, &pagep);
- if (ret) {
- dev_info(&dd->pcidev->dev, "Failed to lookup and lock "
- "address %llx for rcvhdrtail: errno %d\n",
- (unsigned long long) uinfo->spu_rcvhdraddr, -ret);
- goto done;
+ /* don't allow them to later change with mprotect */
+ vma->vm_flags &= ~VM_MAYWRITE;
}
- ipath_stats.sps_pagelocks++;
- pd->port_rcvhdrtail_uaddr = uaddr;
- pd->port_rcvhdrtail_pagep = pagep;
- pd->port_rcvhdrtail_kvaddr =
- page_address(pagep);
- pd->port_rcvhdrtail_kvaddr += off;
- physaddr = page_to_phys(pagep) + off;
- ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx "
- "physical (off=%llx)\n",
- pd->port_port,
- (unsigned long long) uinfo->spu_rcvhdraddr,
- (unsigned long long) physaddr, (unsigned long long) off);
- ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
- pd->port_port, physaddr);
- atmp = ipath_read_kreg64_port(dd,
- dd->ipath_kregs->kr_rcvhdrtailaddr,
- pd->port_port);
- if (physaddr != atmp) {
- ipath_dev_err(dd,
- "Catastrophic software error, "
- "RcvHdrTailAddr%u written as %llx, "
- "read back as %llx\n", pd->port_port,
- (unsigned long long) physaddr,
- (unsigned long long) atmp);
- ret = -EINVAL;
- goto done;
- }
-
- /* for right now, kernel piobufs are at end, so port 1 is at 0 */
- pd->port_piobufs = dd->ipath_piobufbase +
- dd->ipath_pbufsport * (pd->port_port -
- 1) * dd->ipath_palign;
- ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
- pd->port_port, pd->port_piobufs);
- /*
- * Now allocate the rcvhdr Q and eager TIDs; skip the TID
- * array for time being. If pd->port_port > chip-supported,
- * we need to do extra stuff here to handle by handling overflow
- * through port 0, someday
- */
- ret = ipath_create_rcvhdrq(dd, pd);
- if (!ret)
- ret = ipath_create_user_egr(pd);
+ pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, vma->vm_start, pfn,
+ len, vma->vm_page_prot);
if (ret)
- goto done;
- /* enable receives now */
- /* atomically set enable bit for this port */
- set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
- &dd->ipath_rcvctrl);
-
- /*
- * set the head registers for this port to the current values
- * of the tail pointers, since we don't know if they were
- * updated on last use of the port.
- */
- head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
- head = (u64) head32;
- ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port);
- head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
- ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
- dd->ipath_lastegrheads[pd->port_port] = -1;
- dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
- ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from "
- "tail regs\n", pd->port_port,
- (unsigned long long) head, head32);
- pd->port_tidcursor = 0; /* start at beginning after open */
- /*
- * now enable the port; the tail registers will be written to memory
- * by the chip as soon as it sees the write to
- * dd->ipath_kregs->kr_rcvctrl. The update only happens on
- * transition from 0 to 1, so clear it first, then set it as part of
- * enabling the port. This will (very briefly) affect any other
- * open ports, but it shouldn't be long enough to be an issue.
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
-
-done:
+ dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+ "bytes r%c failed: %d\n", what, pd->port_port,
+ pfn, len, write_ok?'w':'o', ret);
+ else
+ ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+ "r%c\n", what, pd->port_port, pfn, len,
+ write_ok?'w':'o');
+bail:
return ret;
}
@@ -940,8 +980,11 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
unsigned long phys;
int ret;
- /* it's the real hardware, so io_remap works */
-
+ /*
+ * This is real hardware, so use io_remap. This is the mechanism
+ * for the user process to update the head registers for their port
+ * in the chip.
+ */
if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
"%lx > PAGE\n", vma->vm_end - vma->vm_start);
@@ -961,44 +1004,45 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
static int mmap_piobufs(struct vm_area_struct *vma,
struct ipath_devdata *dd,
- struct ipath_portdata *pd)
+ struct ipath_portdata *pd,
+ unsigned piobufs, unsigned piocnt)
{
unsigned long phys;
int ret;
/*
- * When we map the PIO buffers, we want to map them as writeonly, no
- * read possible.
+ * When we map the PIO buffers in the chip, we want to map them as
+ * writeonly, no read possible. This prevents access to previous
+ * process data, and catches users who might try to read the i/o
+ * space due to a bug.
*/
-
- if ((vma->vm_end - vma->vm_start) >
- (dd->ipath_pbufsport * dd->ipath_palign)) {
+ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
"reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
- phys = dd->ipath_physaddr + pd->port_piobufs;
+ phys = dd->ipath_physaddr + piobufs;
+
/*
- * Do *NOT* mark this as non-cached (PWT bit), or we don't get the
+ * Don't mark this as non-cached, or we don't get the
* write combining behavior we want on the PIO buffers!
- * vma->vm_page_prot =
- * pgprot_noncached(vma->vm_page_prot);
*/
- if (vma->vm_flags & VM_READ) {
- dev_info(&dd->pcidev->dev,
- "Can't map piobufs as readable (flags=%lx)\n",
- vma->vm_flags);
- ret = -EPERM;
- goto bail;
- }
-
- /* don't allow them to later change to readable with mprotect */
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
- vma->vm_flags &= ~VM_MAYWRITE;
+ /*
+ * don't allow them to later change to readable with mprotect (for when
+ * not initially mapped readable, as is normally the case)
+ */
+ vma->vm_flags &= ~VM_MAYREAD;
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
@@ -1014,14 +1058,9 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
struct ipath_devdata *dd = pd->port_dd;
unsigned long start, size;
size_t total_size, i;
- dma_addr_t *phys;
+ unsigned long pfn;
int ret;
- if (!pd->port_rcvegrbuf) {
- ret = -EFAULT;
- goto bail;
- }
-
size = pd->port_rcvegrbuf_size;
total_size = pd->port_rcvegrbuf_chunks * size;
if ((vma->vm_end - vma->vm_start) > total_size) {
@@ -1029,7 +1068,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
"reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
- ret = -EFAULT;
+ ret = -EINVAL;
goto bail;
}
@@ -1039,16 +1078,15 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
ret = -EPERM;
goto bail;
}
-
- start = vma->vm_start;
- phys = pd->port_rcvegrbuf_phys;
-
/* don't allow them to later change to writeable with mprotect */
vma->vm_flags &= ~VM_MAYWRITE;
+ start = vma->vm_start;
+
for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
- ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
- size, vma->vm_page_prot);
+ pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, start, pfn, size,
+ vma->vm_page_prot);
if (ret < 0)
goto bail;
}
@@ -1058,74 +1096,118 @@ bail:
return ret;
}
-static int mmap_rcvhdrq(struct vm_area_struct *vma,
- struct ipath_portdata *pd)
+/*
+ * ipath_file_vma_nopage - handle a VMA page fault.
+ */
+static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma,
+ unsigned long address, int *type)
{
- struct ipath_devdata *dd = pd->port_dd;
- size_t total_size;
- int ret;
+ unsigned long offset = address - vma->vm_start;
+ struct page *page = NOPAGE_SIGBUS;
+ void *pageptr;
/*
- * kmalloc'ed memory, physically contiguous; this is from
- * spi_rcvhdr_base; we allow user to map read-write so they can
- * write hdrq entries to allow protocol code to directly poll
- * whether a hdrq entry has been written.
+ * Convert the vmalloc address into a struct page.
*/
- total_size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
- sizeof(u32), PAGE_SIZE);
- if ((vma->vm_end - vma->vm_start) > total_size) {
- dev_info(&dd->pcidev->dev,
- "FAIL on rcvhdrq: reqlen %lx > actual %lx\n",
- vma->vm_end - vma->vm_start,
- (unsigned long) total_size);
- ret = -EFAULT;
- goto bail;
- }
-
- ret = remap_pfn_range(vma, vma->vm_start,
- pd->port_rcvhdrq_phys >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-bail:
- return ret;
+ pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
+ page = vmalloc_to_page(pageptr);
+ if (!page)
+ goto out;
+
+ /* Increment the reference count. */
+ get_page(page);
+ if (type)
+ *type = VM_FAULT_MINOR;
+out:
+ return page;
}
-static int mmap_pioavailregs(struct vm_area_struct *vma,
- struct ipath_portdata *pd)
+static struct vm_operations_struct ipath_file_vm_ops = {
+ .nopage = ipath_file_vma_nopage,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+ struct ipath_portdata *pd, unsigned subport)
{
- struct ipath_devdata *dd = pd->port_dd;
+ unsigned long len;
+ struct ipath_devdata *dd;
+ void *addr;
+ size_t size;
int ret;
+ /* If the port is not shared, all addresses should be physical */
+ if (!pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ dd = pd->port_dd;
+ size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
/*
- * when we map the PIO bufferavail registers, we want to map them as
- * readonly, no write possible.
- *
- * kmalloc'ed memory, physically contiguous, one page only, readonly
+ * Master has all the slave uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped.
*/
-
- if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
- dev_info(&dd->pcidev->dev, "FAIL on pioavailregs_dma: "
- "reqlen %lx > actual %lx\n",
- vma->vm_end - vma->vm_start,
- (unsigned long) PAGE_SIZE);
- ret = -EFAULT;
+ if (subport == 0) {
+ unsigned num_slaves = pd->port_subport_cnt - 1;
+
+ if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * num_slaves;
+ } else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * num_slaves;
+ } else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= num_slaves;
+ } else {
+ ret = -EINVAL;
+ goto bail;
+ }
+ } else if (pgaddr == (((u64) pd->subport_uregbase +
+ PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
+ size = PAGE_SIZE;
+ } else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * (subport - 1)) &
+ MMAP64_MASK)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * (subport - 1);
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
+ size * (subport - 1)) & MMAP64_MASK)) {
+ addr = pd->subport_rcvegrbuf + size * (subport - 1);
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ } else {
+ ret = -EINVAL;
goto bail;
}
-
- if (vma->vm_flags & VM_WRITE) {
- dev_info(&dd->pcidev->dev,
- "Can't map pioavailregs as writable (flags=%lx)\n",
- vma->vm_flags);
- ret = -EPERM;
+ len = vma->vm_end - vma->vm_start;
+ if (len > size) {
+ ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+ ret = -EINVAL;
goto bail;
}
- /* don't allow them to later change with mprotect */
- vma->vm_flags &= ~VM_MAYWRITE;
+ vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+ vma->vm_ops = &ipath_file_vm_ops;
+ vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ ret = 0;
- ret = remap_pfn_range(vma, vma->vm_start,
- dd->ipath_pioavailregs_phys >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot);
bail:
return ret;
}
@@ -1145,41 +1227,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
struct ipath_portdata *pd;
struct ipath_devdata *dd;
u64 pgaddr, ureg;
+ unsigned piobufs, piocnt;
int ret;
pd = port_fp(fp);
+ if (!pd) {
+ ret = -EINVAL;
+ goto bail;
+ }
dd = pd->port_dd;
+
/*
* This is the ipath_do_user_init() code, mapping the shared buffers
* into the user process. The address referred to by vm_pgoff is the
- * virtual, not physical, address; we only do one mmap for each
- * space mapped.
+ * file offset passed via mmap(). For shared ports, this is the
+ * kernel vmalloc() address of the pages to share with the master.
+ * For non-shared or master ports, this is a physical address.
+ * We only do one mmap for each space mapped.
*/
pgaddr = vma->vm_pgoff << PAGE_SHIFT;
/*
- * note that ureg does *NOT* have the kregvirt as part of it, to be
- * sure that for 32 bit programs, we don't end up trying to map a >
- * 44 address. Has to match ipath_get_base_info() code that sets
- * __spi_uregbase
+ * Check for 0 in case one of the allocations failed, but user
+ * called mmap anyway.
*/
+ if (!pgaddr) {
+ ret = -EINVAL;
+ goto bail;
+ }
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
-
- ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
+ ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
(unsigned long long) pgaddr, vma->vm_start,
- vma->vm_end - vma->vm_start);
+ vma->vm_end - vma->vm_start, dd->ipath_unit,
+ pd->port_port, subport_fp(fp));
+
+ /*
+ * Physical addresses must fit in 40 bits for our hardware.
+ * Check for kernel virtual addresses first, anything else must
+ * match a HW or memory address.
+ */
+ if (pgaddr >= (1ULL<<40)) {
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ goto bail;
+ }
+
+ if (!pd->port_subport_cnt) {
+ /* port is not shared */
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ piocnt = dd->ipath_pbufsport;
+ piobufs = pd->port_piobufs;
+ } else if (!subport_fp(fp)) {
+ /* caller is the master */
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+ piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
+ (dd->ipath_pbufsport % pd->port_subport_cnt);
+ piobufs = pd->port_piobufs +
+ dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ /* caller is a slave */
+ ureg = 0;
+ piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+ piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
+ }
if (pgaddr == ureg)
ret = mmap_ureg(vma, dd, ureg);
- else if (pgaddr == pd->port_piobufs)
- ret = mmap_piobufs(vma, dd, pd);
- else if (pgaddr == (u64) pd->port_rcvegr_phys)
+ else if (pgaddr == piobufs)
+ ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+ else if (pgaddr == dd->ipath_pioavailregs_phys)
+ /* in-memory copy of pioavail registers */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ (void *) dd->ipath_pioavailregs_dma,
+ "pioavail registers");
+ else if (subport_fp(fp))
+ /* Subports don't mmap the physical receive buffers */
+ ret = -EINVAL;
+ else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
- ret = mmap_rcvhdrq(vma, pd);
- else if (pgaddr == dd->ipath_pioavailregs_phys)
- ret = mmap_pioavailregs(vma, pd);
+ /*
+ * The rcvhdrq itself; readonly except on HT (so have
+ * to allow writable mapping), multiple pages, contiguous
+ * from an i/o perspective.
+ */
+ ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+ pd->port_rcvhdrq,
+ "rcvhdrq");
+ else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
+ /* in-memory copy of rcvhdrq tail register */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ pd->port_rcvhdrtail_kvaddr,
+ "rcvhdrq tail");
else
ret = -EINVAL;
@@ -1187,9 +1327,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
if (ret < 0)
dev_info(&dd->pcidev->dev,
- "Failure %d on addr %lx, off %lx\n",
- -ret, vma->vm_start, vma->vm_pgoff);
-
+ "Failure %d on off %llx len %lx\n",
+ -ret, (unsigned long long)pgaddr,
+ vma->vm_end - vma->vm_start);
+bail:
return ret;
}
@@ -1199,9 +1340,12 @@ static unsigned int ipath_poll(struct file *fp,
struct ipath_portdata *pd;
u32 head, tail;
int bit;
+ unsigned pollflag = 0;
struct ipath_devdata *dd;
pd = port_fp(fp);
+ if (!pd)
+ goto bail;
dd = pd->port_dd;
bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
@@ -1224,7 +1368,7 @@ static unsigned int ipath_poll(struct file *fp,
if (tail == head) {
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
- if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+ if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
(void)ipath_write_ureg(dd, ur_rcvhdrhead,
dd->ipath_rhdrhead_intr_off
| head, pd->port_port);
@@ -1235,9 +1379,12 @@ static unsigned int ipath_poll(struct file *fp,
clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
pd->port_rcvwait_to++;
}
+ else
+ pollflag = POLLIN | POLLRDNORM;
}
else {
/* it's already happened; don't do wait_event overhead */
+ pollflag = POLLIN | POLLRDNORM;
pd->port_rcvnowait++;
}
@@ -1245,18 +1392,80 @@ static unsigned int ipath_poll(struct file *fp,
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
- return 0;
+bail:
+ return pollflag;
+}
+
+static int init_subports(struct ipath_devdata *dd,
+ struct ipath_portdata *pd,
+ const struct ipath_user_info *uinfo)
+{
+ int ret = 0;
+ unsigned num_slaves;
+ size_t size;
+
+ /* Old user binaries don't know about subports */
+ if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
+ goto bail;
+ /*
+ * If the user is requesting zero or one port,
+ * skip the subport allocation.
+ */
+ if (uinfo->spu_subport_cnt <= 1)
+ goto bail;
+ if (uinfo->spu_subport_cnt > 4) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ num_slaves = uinfo->spu_subport_cnt - 1;
+ pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+ if (!pd->subport_uregbase) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE) * num_slaves;
+ pd->subport_rcvhdr_base = vmalloc(size);
+ if (!pd->subport_rcvhdr_base) {
+ ret = -ENOMEM;
+ goto bail_ureg;
+ }
+
+ pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_slaves);
+ if (!pd->subport_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail_rhdr;
+ }
+
+ pd->port_subport_cnt = uinfo->spu_subport_cnt;
+ pd->port_subport_id = uinfo->spu_subport_id;
+ pd->active_slaves = 1;
+ goto bail;
+
+bail_rhdr:
+ vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+ vfree(pd->subport_uregbase);
+ pd->subport_uregbase = NULL;
+bail:
+ return ret;
}
static int try_alloc_port(struct ipath_devdata *dd, int port,
- struct file *fp)
+ struct file *fp,
+ const struct ipath_user_info *uinfo)
{
+ struct ipath_portdata *pd;
int ret;
- if (!dd->ipath_pd[port]) {
- void *p, *ptmp;
+ if (!(pd = dd->ipath_pd[port])) {
+ void *ptmp;
- p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+ pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
/*
* Allocate memory for use in ipath_tid_update() just once
@@ -1266,34 +1475,36 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
dd->ipath_rcvtidcnt * sizeof(struct page **),
GFP_KERNEL);
- if (!p || !ptmp) {
+ if (!pd || !ptmp) {
ipath_dev_err(dd, "Unable to allocate portdata "
"memory, failing open\n");
ret = -ENOMEM;
- kfree(p);
+ kfree(pd);
kfree(ptmp);
goto bail;
}
- dd->ipath_pd[port] = p;
+ dd->ipath_pd[port] = pd;
dd->ipath_pd[port]->port_port = port;
dd->ipath_pd[port]->port_dd = dd;
dd->ipath_pd[port]->port_tid_pg_list = ptmp;
init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
}
- if (!dd->ipath_pd[port]->port_cnt) {
- dd->ipath_pd[port]->port_cnt = 1;
- fp->private_data = (void *) dd->ipath_pd[port];
+ if (!pd->port_cnt) {
+ pd->userversion = uinfo->spu_userversion;
+ init_user_egr_sizes(pd);
+ if ((ret = init_subports(dd, pd, uinfo)) != 0)
+ goto bail;
ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
current->comm, current->pid, dd->ipath_unit,
port);
- dd->ipath_pd[port]->port_pid = current->pid;
- strncpy(dd->ipath_pd[port]->port_comm, current->comm,
- sizeof(dd->ipath_pd[port]->port_comm));
+ pd->port_cnt = 1;
+ port_fp(fp) = pd;
+ pd->port_pid = current->pid;
+ strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
- goto bail;
- }
- ret = -EBUSY;
+ } else
+ ret = -EBUSY;
bail:
return ret;
@@ -1309,7 +1520,8 @@ static inline int usable(struct ipath_devdata *dd)
| IPATH_LINKUNK));
}
-static int find_free_port(int unit, struct file *fp)
+static int find_free_port(int unit, struct file *fp,
+ const struct ipath_user_info *uinfo)
{
struct ipath_devdata *dd = ipath_lookup(unit);
int ret, i;
@@ -1324,8 +1536,8 @@ static int find_free_port(int unit, struct file *fp)
goto bail;
}
- for (i = 0; i < dd->ipath_cfgports; i++) {
- ret = try_alloc_port(dd, i, fp);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (ret != -EBUSY)
goto bail;
}
@@ -1335,26 +1547,27 @@ bail:
return ret;
}
-static int find_best_unit(struct file *fp)
+static int find_best_unit(struct file *fp,
+ const struct ipath_user_info *uinfo)
{
int ret = 0, i, prefunit = -1, devmax;
int maxofallports, npresent, nup;
int ndev;
- (void) ipath_count_units(&npresent, &nup, &maxofallports);
+ devmax = ipath_count_units(&npresent, &nup, &maxofallports);
/*
* This code is present to allow a knowledgeable person to
* specify the layout of processes to processors before opening
* this driver, and then we'll assign the process to the "closest"
- * HT-400 to that processor (we assume reasonable connectivity,
+ * InfiniPath chip to that processor (we assume reasonable connectivity,
* for now). This code assumes that if affinity has been set
* before this point, that at most one cpu is set; for now this
* is reasonable. I check for both cpus_empty() and cpus_full(),
* in case some kernel variant sets none of the bits when no
* affinity is set. 2.6.11 and 12 kernels have all present
* cpus set. Some day we'll have to fix it up further to handle
- * a cpu subset. This algorithm fails for two HT-400's connected
+ * a cpu subset. This algorithm fails for two HT chips connected
* in tunnel fashion. Eventually this needs real topology
* information. There may be some issues with dual core numbering
* as well. This needs more work prior to release.
@@ -1388,8 +1601,6 @@ static int find_best_unit(struct file *fp)
if (prefunit != -1)
devmax = prefunit + 1;
- else
- devmax = ipath_count_units(NULL, NULL, NULL);
recheck:
for (i = 1; i < maxofallports; i++) {
for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
@@ -1404,7 +1615,7 @@ recheck:
* next.
*/
continue;
- ret = try_alloc_port(dd, i, fp);
+ ret = try_alloc_port(dd, i, fp, uinfo);
if (!ret)
goto done;
}
@@ -1440,22 +1651,183 @@ done:
return ret;
}
+static int find_shared_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int devmax, ndev, i;
+ int ret = 0;
+
+ devmax = ipath_count_units(NULL, NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct ipath_devdata *dd = ipath_lookup(ndev);
+
+ if (!dd)
+ continue;
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ /* Skip ports which are not yet open */
+ if (!pd || !pd->port_cnt)
+ continue;
+ /* Skip port if it doesn't match the requested one */
+ if (pd->port_subport_id != uinfo->spu_subport_id)
+ continue;
+ /* Verify the sharing process matches the master */
+ if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+ pd->userversion != uinfo->spu_userversion ||
+ pd->port_cnt >= pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto done;
+ }
+ port_fp(fp) = pd;
+ subport_fp(fp) = pd->port_cnt++;
+ tidcursor_fp(fp) = 0;
+ pd->active_slaves |= 1 << subport_fp(fp);
+ ipath_cdbg(PROC,
+ "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+ current->comm, current->pid,
+ subport_fp(fp),
+ pd->port_comm, pd->port_pid,
+ dd->ipath_unit, pd->port_port);
+ ret = 1;
+ goto done;
+ }
+ }
+
+done:
+ return ret;
+}
+
static int ipath_open(struct inode *in, struct file *fp)
{
- int ret, minor;
+ /* The real work is performed later in ipath_assign_port() */
+ fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+ return fp->private_data ? 0 : -ENOMEM;
+}
+
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ int i_minor;
+ unsigned swminor;
+
+ /* Check to be sure we haven't already initialized this file */
+ if (port_fp(fp)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* for now, if major version is different, bail */
+ if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+ ipath_dbg("User major version %d not same as driver "
+ "major %d\n", uinfo->spu_userversion >> 16,
+ IPATH_USER_SWMAJOR);
+ ret = -ENODEV;
+ goto done;
+ }
+
+ swminor = uinfo->spu_userversion & 0xffff;
+ if (swminor != IPATH_USER_SWMINOR)
+ ipath_dbg("User minor version %d not same as driver "
+ "minor %d\n", swminor, IPATH_USER_SWMINOR);
mutex_lock(&ipath_mutex);
- minor = iminor(in);
+ if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+ (ret = find_shared_port(fp, uinfo))) {
+ mutex_unlock(&ipath_mutex);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE;
ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
- (long)in->i_rdev, minor);
+ (long)fp->f_dentry->d_inode->i_rdev, i_minor);
- if (minor)
- ret = find_free_port(minor - 1, fp);
+ if (i_minor)
+ ret = find_free_port(i_minor - 1, fp, uinfo);
else
- ret = find_best_unit(fp);
+ ret = find_best_unit(fp, uinfo);
mutex_unlock(&ipath_mutex);
+
+done:
+ return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ struct ipath_portdata *pd;
+ struct ipath_devdata *dd;
+ u32 head32;
+
+ pd = port_fp(fp);
+ dd = pd->port_dd;
+
+ if (uinfo->spu_rcvhdrsize) {
+ ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+ if (ret)
+ goto done;
+ }
+
+ /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+ /* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ pd->port_piobufs = dd->ipath_piobufbase +
+ dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
+ pd->port_port, pd->port_piobufs);
+
+ /*
+ * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ * array for time being. If pd->port_port > chip-supported,
+ * we need to do extra stuff here to handle by handling overflow
+ * through port 0, someday
+ */
+ ret = ipath_create_rcvhdrq(dd, pd);
+ if (!ret)
+ ret = ipath_create_user_egr(pd);
+ if (ret)
+ goto done;
+
+ /*
+ * set the eager head register for this port to the current values
+ * of the tail pointers, since we don't know if they were
+ * updated on last use of the port.
+ */
+ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+ dd->ipath_lastegrheads[pd->port_port] = -1;
+ dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
+ ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+ pd->port_port, head32);
+ pd->port_tidcursor = 0; /* start at beginning after open */
+ /*
+ * now enable the port; the tail registers will be written to memory
+ * by the chip as soon as it sees the write to
+ * dd->ipath_kregs->kr_rcvctrl. The update only happens on
+ * transition from 0 to 1, so clear it first, then set it as part of
+ * enabling the port. This will (very briefly) affect any other
+ * open ports, but it shouldn't be long enough to be an issue.
+ * We explictly set the in-memory copy to 0 beforehand, so we don't
+ * have to wait to be sure the DMA update has happened.
+ */
+ *(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL;
+ set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+ &dd->ipath_rcvctrl);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+done:
return ret;
}
@@ -1478,6 +1850,8 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
if (!dd->ipath_pageshadow[i])
continue;
+ pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
1);
dd->ipath_pageshadow[i] = NULL;
@@ -1498,6 +1872,7 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
static int ipath_close(struct inode *in, struct file *fp)
{
int ret = 0;
+ struct ipath_filedata *fd;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
unsigned port;
@@ -1507,9 +1882,24 @@ static int ipath_close(struct inode *in, struct file *fp)
mutex_lock(&ipath_mutex);
- pd = port_fp(fp);
- port = pd->port_port;
+ fd = (struct ipath_filedata *) fp->private_data;
fp->private_data = NULL;
+ pd = fd->pd;
+ if (!pd) {
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ if (--pd->port_cnt) {
+ /*
+ * XXX If the master closes the port before the slave(s),
+ * revoke the mmap for the eager receive queue so
+ * the slave(s) don't wait for receive data forever.
+ */
+ pd->active_slaves &= ~(1 << fd->subport);
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ port = pd->port_port;
dd = pd->port_dd;
if (pd->port_hdrqfull) {
@@ -1536,70 +1926,79 @@ static int ipath_close(struct inode *in, struct file *fp)
}
if (dd->ipath_kregbase) {
- if (pd->port_rcvhdrtail_uaddr) {
- pd->port_rcvhdrtail_uaddr = 0;
- pd->port_rcvhdrtail_kvaddr = NULL;
- ipath_release_user_pages_on_close(
- &pd->port_rcvhdrtail_pagep, 1);
- pd->port_rcvhdrtail_pagep = NULL;
- ipath_stats.sps_pageunlocks++;
- }
- ipath_write_kreg_port(
- dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
- port, 0ULL);
- ipath_write_kreg_port(
- dd, dd->ipath_kregs->kr_rcvhdraddr,
- pd->port_port, 0);
+ int i;
+ /* atomically clear receive enable port. */
+ clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
+ &dd->ipath_rcvctrl);
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ /* and read back from chip to be sure that nothing
+ * else is in flight when we do the rest */
+ (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
/* clean up the pkeys for this port user */
ipath_clean_part_key(pd, dd);
+ /*
+ * be paranoid, and never write 0's to these, just use an
+ * unused part of the port 0 tail page. Of course,
+ * rcvhdraddr points to a large chunk of memory, so this
+ * could still trash things, but at least it won't trash
+ * page 0, and by disabling the port, it should stop "soon",
+ * even if a packet or two is in already in flight after we
+ * disabled the port.
+ */
+ ipath_write_kreg_port(dd,
+ dd->ipath_kregs->kr_rcvhdrtailaddr, port,
+ dd->ipath_dummy_hdrq_phys);
+ ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
+ pd->port_port, dd->ipath_dummy_hdrq_phys);
- if (port < dd->ipath_cfgports) {
- int i = dd->ipath_pbufsport * (port - 1);
- ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
+ i = dd->ipath_pbufsport * (port - 1);
+ ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
- /* atomically clear receive enable port. */
- clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
- &dd->ipath_rcvctrl);
- ipath_write_kreg(
- dd,
- dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
-
- if (dd->ipath_pageshadow)
- unlock_expected_tids(pd);
- ipath_stats.sps_ports--;
- ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
- pd->port_comm, pd->port_pid,
- dd->ipath_unit, port);
- }
+ dd->ipath_f_clear_tids(dd, pd->port_port);
+
+ if (dd->ipath_pageshadow)
+ unlock_expected_tids(pd);
+ ipath_stats.sps_ports--;
+ ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
+ pd->port_comm, pd->port_pid,
+ dd->ipath_unit, port);
}
- pd->port_cnt = 0;
pd->port_pid = 0;
-
- dd->ipath_f_clear_tids(dd, pd->port_port);
-
- ipath_free_pddata(dd, pd->port_port, 0);
-
+ dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex);
+ ipath_free_pddata(dd, pd); /* after releasing the mutex */
+bail:
+ kfree(fd);
return ret;
}
-static int ipath_port_info(struct ipath_portdata *pd,
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
struct ipath_port_info __user *uinfo)
{
struct ipath_port_info info;
int nup;
int ret;
+ size_t sz;
(void) ipath_count_units(NULL, &nup, NULL);
info.num_active = nup;
info.unit = pd->port_dd->ipath_unit;
info.port = pd->port_port;
+ info.subport = subport;
+ /* Don't return new fields if old library opened the port. */
+ if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+ /* Number of user ports available for this device. */
+ info.num_ports = pd->port_dd->ipath_cfgports - 1;
+ info.num_subports = pd->port_subport_cnt;
+ sz = sizeof(info);
+ } else
+ sz = sizeof(info) - 2 * sizeof(u16);
- if (copy_to_user(uinfo, &info, sizeof(info))) {
+ if (copy_to_user(uinfo, &info, sz)) {
ret = -EFAULT;
goto bail;
}
@@ -1609,6 +2008,16 @@ bail:
return ret;
}
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+ void __user *slave_mask_addr)
+{
+ int ret = 0;
+
+ if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+ ret = -EFAULT;
+ return ret;
+}
+
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
@@ -1635,6 +2044,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed = sizeof(cmd.type);
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ case __IPATH_CMD_USER_INIT:
case IPATH_CMD_USER_INIT:
copy = sizeof(cmd.cmd.user_info);
dest = &cmd.cmd.user_info;
@@ -1661,6 +2072,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
+ case IPATH_CMD_SLAVE_INFO:
+ copy = sizeof(cmd.cmd.slave_mask_addr);
+ dest = &cmd.cmd.slave_mask_addr;
+ src = &ucmd->cmd.slave_mask_addr;
+ break;
default:
ret = -EINVAL;
goto bail;
@@ -1678,34 +2094,55 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed += copy;
pd = port_fp(fp);
+ if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+ cmd.type != IPATH_CMD_ASSIGN_PORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ break;
+ case __IPATH_CMD_USER_INIT:
+ /* backwards compatibility, get port first */
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ /* and fall through to current version. */
case IPATH_CMD_USER_INIT:
- ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
- if (ret < 0)
+ ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+ if (ret)
goto bail;
ret = ipath_get_base_info(
- pd, (void __user *) (unsigned long)
+ fp, (void __user *) (unsigned long)
cmd.cmd.user_info.spu_base_info,
cmd.cmd.user_info.spu_base_info_size);
break;
case IPATH_CMD_RECV_CTRL:
- ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
+ ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
break;
case IPATH_CMD_PORT_INFO:
- ret = ipath_port_info(pd,
+ ret = ipath_port_info(pd, subport_fp(fp),
(struct ipath_port_info __user *)
(unsigned long) cmd.cmd.port_info);
break;
case IPATH_CMD_TID_UPDATE:
- ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
break;
case IPATH_CMD_TID_FREE:
- ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
+ ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
break;
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
+ case IPATH_CMD_SLAVE_INFO:
+ ret = ipath_get_slave_info(pd,
+ (void __user *) (unsigned long)
+ cmd.cmd.slave_mask_addr);
+ break;
}
if (ret >= 0)
@@ -1859,19 +2296,12 @@ int ipath_user_add(struct ipath_devdata *dd)
"error %d\n", -ret);
goto bail;
}
- ret = ipath_diag_init();
- if (ret < 0) {
- ipath_dev_err(dd, "Unable to set up diag support: "
- "error %d\n", -ret);
- goto bail_sma;
- }
-
ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
&wildcard_class_dev);
if (ret < 0) {
ipath_dev_err(dd, "Could not create wildcard "
"minor: error %d\n", -ret);
- goto bail_diag;
+ goto bail_user;
}
atomic_set(&user_setup, 1);
@@ -1880,31 +2310,28 @@ int ipath_user_add(struct ipath_devdata *dd)
snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
- &dd->cdev, &dd->class_dev);
+ &dd->user_cdev, &dd->user_class_dev);
if (ret < 0)
ipath_dev_err(dd, "Could not create user minor %d, %s\n",
dd->ipath_unit + 1, name);
goto bail;
-bail_diag:
- ipath_diag_cleanup();
-bail_sma:
+bail_user:
user_cleanup();
bail:
return ret;
}
-void ipath_user_del(struct ipath_devdata *dd)
+void ipath_user_remove(struct ipath_devdata *dd)
{
- cleanup_cdev(&dd->cdev, &dd->class_dev);
+ cleanup_cdev(&dd->user_cdev, &dd->user_class_dev);
if (atomic_dec_return(&user_count) == 0) {
if (atomic_read(&user_setup) == 0)
goto bail;
cleanup_cdev(&wildcard_cdev, &wildcard_class_dev);
- ipath_diag_cleanup();
user_cleanup();
atomic_set(&user_setup, 0);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 63de3046aff3..a507d0b5be6c 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,7 +32,6 @@
*/
#include <linux/version.h>
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
@@ -61,10 +61,9 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->u.generic_ip = data;
+ inode->i_private = data;
if ((mode & S_IFMT) == S_IFDIR) {
inode->i_op = &simple_dir_inode_operations;
inode->i_nlink++;
@@ -119,7 +118,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
u16 i;
struct ipath_devdata *dd;
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
for (i = 0; i < NUM_COUNTERS; i++)
counters[i] = ipath_snap_cntr(dd, i);
@@ -139,7 +138,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
struct ipath_devdata *dd;
u64 guid;
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
guid = be64_to_cpu(dd->ipath_guid);
@@ -178,7 +177,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
u32 tmp, tmp2;
struct ipath_devdata *dd;
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
/* so we only initialize non-zero fields. */
memset(portinfo, 0, sizeof portinfo);
@@ -191,8 +190,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
portinfo[4] = (dd->ipath_lid << 16);
/*
- * Notimpl yet SMLID (should we store this in the driver, in case
- * SMA dies?) CapabilityMask is 0, we don't support any of these
+ * Notimpl yet SMLID.
+ * CapabilityMask is 0, we don't support any of these
* DiagCode is 0; we don't store any diag info for now Notimpl yet
* M_KeyLeasePeriod (we don't support M_Key)
*/
@@ -325,7 +324,7 @@ static ssize_t flash_read(struct file *file, char __user *buf,
goto bail;
}
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
if (ipath_eeprom_read(dd, pos, tmp, count)) {
ipath_dev_err(dd, "failed to read from flash\n");
ret = -ENXIO;
@@ -357,19 +356,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
pos = *ppos;
- if ( pos < 0) {
+ if (pos != 0) {
ret = -EINVAL;
goto bail;
}
- if (pos >= sizeof(struct ipath_flash)) {
- ret = 0;
+ if (count != sizeof(struct ipath_flash)) {
+ ret = -EINVAL;
goto bail;
}
- if (count > sizeof(struct ipath_flash) - pos)
- count = sizeof(struct ipath_flash) - pos;
-
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
@@ -381,7 +377,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
goto bail_tmp;
}
- dd = file->f_dentry->d_inode->u.generic_ip;
+ dd = file->f_dentry->d_inode->i_private;
if (ipath_eeprom_write(dd, pos, tmp, count)) {
ret = -ENXIO;
ipath_dev_err(dd, "failed to write to flash\n");
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index fac0a2b74de2..9e4e8d4c6e20 100644
--- a/drivers/infiniband/hw/ipath/ipath_ht400.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,7 +33,7 @@
/*
* This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
+ * HT chip.
*/
#include <linux/pci.h>
@@ -42,7 +43,7 @@
#include "ipath_registers.h"
/*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
*
* The names are in InterCap form because they're taken straight from
@@ -251,8 +252,8 @@ static const struct ipath_cregs ipath_ht_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -337,7 +338,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
if (crcbits) {
u16 ctrl0, ctrl1;
snprintf(bitsmsg, sizeof bitsmsg,
- "[HT%s lane %s CRC (%llx); ignore till reload]",
+ "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
!(crcbits & _IPATH_HTLINK1_CRCBITS) ?
"0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
? "1 (B)" : "0+1 (A+B)"),
@@ -388,17 +389,28 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
_IPATH_HTLINK1_CRCBITS)));
}
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+ INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+ INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+ INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
/**
- * ipath_ht_handle_hwerrors - display hardware errors
+ * ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
* @msg: the output buffer
* @msgl: the size of the output buffer
*
- * Use same msg buffer as regular errors to avoid
- * excessive stack use. Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
*/
static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
size_t msgl)
@@ -439,19 +451,49 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring; only
@@ -460,8 +502,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* times.
*/
if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Error (freeze "
- "mode), no longer usable\n");
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
isfatal = 1;
}
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
@@ -497,46 +540,18 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
- strlcat(msg, "[HTC Ireq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
- strlcat(msg, "[HTC Treq Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
- strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6110_hwerror_msgs,
+ sizeof(ipath_6110_hwerror_msgs) /
+ sizeof(ipath_6110_hwerror_msgs[0]),
+ msg, msgl);
if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
hwerr_crcbits(dd, hwerrs, msg, msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
- strlcat(msg, "[HT core Misc5]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
- strlcat(msg, "[HT core Misc6]", msgl);
- if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
- strlcat(msg, "[HT core Misc7]", msgl);
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
+ strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
/* ignore from now on, so disable until driver reloaded */
dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@@ -552,7 +567,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
if (hwerrs & _IPATH_PLL_FAIL) {
snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), HT-400 unusable]",
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
(unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
strlcat(msg, bitsmsg, msgl);
/* ignore from now on, so disable until driver reloaded */
@@ -571,11 +586,6 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
@@ -609,18 +619,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
break;
case 5:
/*
- * HT-460 original production board; two production levels, with
+ * original production board; two production levels, with
* different serial number ranges. See ipath_ht_early_init() for
* case where we enable IPATH_GPIO_INTR for later serial # range.
*/
- n = "InfiniPath_HT-460";
+ n = "InfiniPath_QHT7040";
break;
case 6:
n = "OEM_Board_3";
break;
case 7:
- /* HT-460 small form factor production board */
- n = "InfiniPath_HT-465";
+ /* small form factor production board */
+ n = "InfiniPath_QHT7140";
break;
case 8:
n = "LS/X-1";
@@ -632,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
n = "OEM_Board_2";
break;
case 11:
- n = "InfiniPath_HT-470";
+ n = "InfiniPath_HT-470"; /* obsoleted */
break;
case 12:
n = "OEM_Board_4";
@@ -640,7 +650,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
default: /* don't know, just print the number */
ipath_dev_err(dd, "Don't yet know about board "
"with ID %u\n", boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
+ snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
boardrev);
break;
}
@@ -649,11 +659,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
/*
- * This version of the driver only supports the HT-400
- * Rev 3.2
+ * This version of the driver only supports Rev 3.2 and 3.3
*/
ipath_dev_err(dd,
- "Unsupported HT-400 revision %u.%u!\n",
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
goto bail;
@@ -737,11 +746,10 @@ static void ipath_check_htlink(struct ipath_devdata *dd)
static int ipath_setup_ht_reset(struct ipath_devdata *dd)
{
- ipath_dbg("No reset possible for HT-400\n");
+ ipath_dbg("No reset possible for this InfiniPath hardware\n");
return 0;
}
-#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */
#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */
#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */
@@ -924,7 +932,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
/*
* kernels with CONFIG_PCI_MSI set the vector in the irq field of
- * struct pci_device, so we use that to program the HT-400 internal
+ * struct pci_device, so we use that to program the internal
* interrupt register (not config space) with that value. The BIOS
* must still have done the basic MSI setup.
*/
@@ -972,7 +980,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
* do this early, before we ever enable errors or hardware errors,
* mostly to avoid causing the chip to enter freeze mode.
*/
- pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
+ pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
if (!pos) {
ipath_dev_err(dd, "Couldn't find HyperTransport "
"capability; no interrupts\n");
@@ -995,7 +1003,7 @@ static int ipath_setup_ht_config(struct ipath_devdata *dd,
else if (cap_type == HT_INTR_DISC_CONFIG)
ihandler = set_int_handler(dd, pdev, pos);
} while ((pos = pci_find_next_capability(pdev, pos,
- HT_CAPABILITY_ID)));
+ PCI_CAP_ID_HT)));
if (!ihandler) {
ipath_dev_err(dd, "Couldn't find interrupt handler in "
@@ -1012,7 +1020,7 @@ bail:
* @dd: the infinipath device
*
* Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
+ * This is currently a nop for the HT chip, not for all chips
*/
static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
{
@@ -1080,21 +1088,21 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
}
-static void ipath_init_ht_variables(void)
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
{
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
- infinipath_i_bitsextant =
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -1112,7 +1120,7 @@ static void ipath_init_ht_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
@@ -1141,8 +1149,8 @@ static void ipath_init_ht_variables(void)
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/**
@@ -1289,6 +1297,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
val &= ~INFINIPATH_XGXS_RESET;
change = 1;
}
+ if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+ INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= dd->ipath_rx_pol_inv <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ change = 1;
+ }
if (change)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
@@ -1469,7 +1486,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
/*
- * For HT-400, we allocate a somewhat overly large eager buffer,
+ * For HT, we allocate a somewhat overly large eager buffer,
* such that we can guarantee that we can receive the largest
* packet that we can send out. To truly support a 4KB MTU,
* we need to bump this to a large value. To date, other than
@@ -1530,7 +1547,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
/*
- * Later production HT-460 has same changes as HT-465, so
+ * Later production QHT7040 has same changes as QHT7140, so
* can use GPIO interrupts. They have serial #'s starting
* with 128, rather than 112.
*/
@@ -1559,20 +1576,19 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
}
/**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
{
dd->ipath_f_intrsetup = ipath_ht_intconfig;
dd->ipath_f_bus = ipath_setup_ht_config;
dd->ipath_f_reset = ipath_setup_ht_reset;
dd->ipath_f_get_boardname = ipath_ht_boardname;
dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
- dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
dd->ipath_f_early_init = ipath_ht_early_init;
dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
@@ -1599,5 +1615,5 @@ void ipath_init_ht400_funcs(struct ipath_devdata *dd)
* do very early init that is needed before ipath_f_bus is
* called
*/
- ipath_init_ht_variables();
+ ipath_init_ht_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 02e8c75b24f6..a72ab9de386a 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,7 +32,7 @@
*/
/*
* This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
+ * InfiniPath PCIe chip.
*/
#include <linux/interrupt.h>
@@ -44,9 +45,9 @@
/*
* This file contains all the chip-specific register information and
- * access functions for the PathScale PE800, the PCI-Express chip.
+ * access functions for the QLogic InfiniPath PCI-Express chip.
*
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
*/
struct _infinipath_do_not_use_kernel_regs {
@@ -212,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = {
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
- /* This group is pe-800-specific; and used only in this file */
/* The rcvpktled register controls one of the debug port signals, so
* a packet activity LED can be connected to it. */
.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@@ -263,8 +263,8 @@ static const struct ipath_cregs ipath_pe_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
+#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
@@ -294,6 +294,33 @@ static const struct ipath_cregs ipath_pe_cregs = {
#define IPATH_GPIO_SCL (1ULL << \
(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+/*
+ * Rev2 silicon allows suppressing check for ArmLaunch errors.
+ * this can speed up short packet sends on systems that do
+ * not guaranteee write-order.
+ */
+#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
+
+/* 6120 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+ INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+ INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -343,19 +370,49 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~dd->ipath_lasthwerror) ||
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
- if (hwerrs & ~infinipath_hwe_bitsextant)
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (hwerrs & ~infinipath_hwe_bitsextant));
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_stats.sps_txeparity++;
+ ipath_dbg("Recovering from TXE parity error (%llu), "
+ "hwerrstatus=%llx\n",
+ (unsigned long long) ipath_stats.sps_txeparity,
+ (unsigned long long) hwerrs);
+ ipath_disarm_senderrbufs(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ if (!hwerrs) { /* else leave in freeze mode */
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ return;
+ }
+ }
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
@@ -363,8 +420,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* and we get here multiple times
*/
if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Error (freeze "
- "mode), no longer usable\n");
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
isfatal = 1;
}
/*
@@ -378,16 +436,15 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
} else {
ipath_dbg("Clearing freezemode on ignored hardware "
"error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
+ dd->ipath_control);
}
}
*msg = '\0';
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
+ strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
/* ignore from now on, so disable until driver reloaded */
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@@ -395,24 +452,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
}
- if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
- if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
- bits);
- strlcat(msg, bitsmsg, msgl);
- }
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6120_hwerror_msgs,
+ sizeof(ipath_6120_hwerror_msgs)/
+ sizeof(ipath_6120_hwerror_msgs[0]),
+ msg, msgl);
+
if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
<< INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
@@ -422,17 +468,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
"[PCIe Mem Parity Errs %x] ", bits);
strlcat(msg, bitsmsg, msgl);
}
- if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
- strlcat(msg, "[IB2IPATH Parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
- strlcat(msg, "[IPATH2IB Parity]", msgl);
#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
INFINIPATH_HWE_COREPLL_RFSLIP )
if (hwerrs & _IPATH_PLL_FAIL) {
snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), PE-800 unusable]",
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
(unsigned long long) hwerrs & _IPATH_PLL_FAIL);
strlcat(msg, bitsmsg, msgl);
/* ignore from now on, so disable until driver reloaded */
@@ -451,34 +493,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
- strlcat(msg, "[PCIe Poisoned TLP]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
- strlcat(msg, "[PCIe completion timeout]", msgl);
-
- /*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
- * parity or memory parity error failures, because most likely we
- * won't be able to talk to the core of the chip. Nonetheless, we
- * might see them, if they are in parts of the PCIe core that aren't
- * essential.
- */
- if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
- strlcat(msg, "[PCIePLL1]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
- strlcat(msg, "[PCIePLL0]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
- strlcat(msg, "[PCIe XTLH core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
- strlcat(msg, "[PCIe ADM TX core parity]", msgl);
- if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
- strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
- if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
- strlcat(msg, "[Rx Dsync]", msgl);
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
- strlcat(msg, "[SerDes PLL]", msgl);
-
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
/*
@@ -510,30 +524,36 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
n = "InfiniPath_Emulation";
break;
case 1:
- n = "InfiniPath_PE-800-Bringup";
+ n = "InfiniPath_QLE7140-Bringup";
break;
case 2:
- n = "InfiniPath_PE-880";
+ n = "InfiniPath_QLE7140";
break;
case 3:
- n = "InfiniPath_PE-850";
+ n = "InfiniPath_QMI7140";
break;
case 4:
- n = "InfiniPath_PE-860";
+ n = "InfiniPath_QEM7140";
+ break;
+ case 5:
+ n = "InfiniPath_QMH7140";
+ break;
+ case 6:
+ n = "InfiniPath_QLE7142";
break;
default:
ipath_dev_err(dd,
"Don't yet know about board with ID %u\n",
boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
+ snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
boardrev);
break;
}
if (n)
snprintf(name, namelen, "%s", n);
- if (dd->ipath_majrev != 4 || dd->ipath_minrev != 1) {
- ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
+ if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
+ ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
} else
@@ -567,9 +587,12 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
if (!dd->ipath_boardrev) // no PLL for Emulator
val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
- /* workaround bug 9460 in internal interface bus parity checking */
- val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
+ if (dd->ipath_minrev < 2) {
+ /* workaround bug 9460 in internal interface bus parity
+ * checking. Fixed (HW bug 9490) in Rev2.
+ */
+ val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+ }
dd->ipath_hwerrmask = val;
}
@@ -579,8 +602,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
*/
static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
{
- u64 val, tmp, config1;
- int ret = 0, change = 0;
+ u64 val, tmp, config1, prev_val;
+ int ret = 0;
ipath_dbg("Trying to bringup serdes\n");
@@ -637,6 +660,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ prev_val = val;
if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
val &=
@@ -644,13 +668,30 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
INFINIPATH_XGXS_MDIOADDR_SHIFT);
/* MDIO address 3 */
val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
- change = 1;
}
if (val & INFINIPATH_XGXS_RESET) {
val &= ~INFINIPATH_XGXS_RESET;
- change = 1;
}
- if (change)
+ if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+ INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= dd->ipath_rx_pol_inv <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ }
+ if (dd->ipath_minrev >= 2) {
+ /* Rev 2. can tolerate multiple writes to PBC, and
+ * allowing them can provide lower latency on some
+ * CPUs, but this feature is off by default, only
+ * turned on by setting D63 of XGXSconfig reg.
+ * May want to make this conditional more
+ * fine-grained in future. This is not exactly
+ * related to XGXS, but where the bit ended up.
+ */
+ val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
+ }
+ if (val != prev_val)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
@@ -704,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
}
-/* this is not yet needed on the PE800, so just return 0. */
static int ipath_pe_intconfig(struct ipath_devdata *dd)
{
+ u64 val;
+ u32 chiprev;
+
+ /*
+ * If the chip supports added error indication via GPIO pins,
+ * enable interrupts on those bits so the interrupt routine
+ * can count the events. Also set flag so interrupt routine
+ * can know they are expected.
+ */
+ chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
+ if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
+ /* Rev2+ reports extra errors via internal GPIO pins */
+ dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ }
return 0;
}
@@ -758,8 +815,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
*
* This is called during driver unload.
* We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
* into the main init_one code.
*/
static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@@ -779,10 +836,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
* late in 2.6.16).
* All that can be done is to edit the kernel source to remove the quirk
* check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800. If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe. If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
* call back into the main init_one code.
* We save the msi lo and hi values, so we can restore them after
* chip reset (the kernel PCI infrastructure doesn't yet handle that
@@ -840,21 +897,23 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd,
return 0;
}
-static void ipath_init_pe_variables(void)
+static void ipath_init_pe_variables(struct ipath_devdata *dd)
{
/*
* bits for selecting i2c direction and values,
* used for I2C serial flash
*/
- ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- ipath_gpio_sda = IPATH_GPIO_SDA;
- ipath_gpio_scl = IPATH_GPIO_SCL;
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
/* variables for sanity checking interrupt and errors */
- infinipath_hwe_bitsextant =
+ dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
INFINIPATH_HWE_PCIE1PLLFAILED |
@@ -870,13 +929,13 @@ static void ipath_init_pe_variables(void)
INFINIPATH_HWE_SERDESPLLFAILED |
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- infinipath_i_bitsextant =
+ dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- infinipath_e_bitsextant =
+ dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -894,8 +953,8 @@ static void ipath_init_pe_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
- infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/* setup the MSI stuff again after a reset. I'd like to just call
@@ -970,8 +1029,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
int ret;
/* Use ERROR so it shows up in logs, etc. */
- ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
- dd->ipath_unit);
+ ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
/* keep chip from being accessed in a few places */
dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
val = dd->ipath_control | INFINIPATH_C_RESET;
@@ -1070,6 +1128,45 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
mmiowb();
spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
}
+/**
+ * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+
+ if (pa != dd->ipath_tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+ "not 2KB aligned!\n", pa);
+ return;
+ }
+ pa >>= 11;
+ /* paranoia check */
+ if (pa & (7<<29))
+ ipath_dev_err(dd,
+ "BUG: Physical page address 0x%lx "
+ "has bits set in 31-29\n", pa);
+
+ if (type == 0)
+ pa |= dd->ipath_tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+ if (dd->ipath_kregbase)
+ writel(pa, tidp32);
+ mmiowb();
+}
+
/**
* ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
@@ -1077,7 +1174,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
* @port: the port
*
* clear all TID entries for a port, expected and eager.
- * Used from ipath_close(). On PE800, TIDs are only 32 bits,
+ * Used from ipath_close(). On this chip, TIDs are only 32 bits,
* not 64, but they are still on 64 bit boundaries, so tidbase
* is declared as u64 * for the pointer math, even though we write 32 bits
*/
@@ -1147,9 +1244,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
dd->ipath_flags |= IPATH_4BYTE_TID;
/*
- * For openib, we need to be able to handle an IB header of 96 bytes
- * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we
- * made them the same size as the PIO buffers. The PE-800 does not
+ * For openfabrics, we need to be able to handle an IB header of
+ * 24 dwords. HT chip has arbitrary sized receive buffers, so we
+ * made them the same size as the PIO buffers. This chip does not
* handle arbitrary size buffers, so we need the header large enough
* to handle largest IB header, but still have room for a 2KB MTU
* standard IB packet.
@@ -1157,11 +1254,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
dd->ipath_rcvhdrentsize = 24;
dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
- /* For HT-400, we allocate a somewhat overly large eager buffer,
- * such that we can guarantee that we can receive the largest packet
- * that we can send out. To truly support a 4KB MTU, we need to
- * bump this to a larger value. We'll do this when I get around to
- * testing 4KB sends on the PE-800, which I have not yet done.
+ /*
+ * To truly support a 4KB MTU (for usermode), we need to
+ * bump this to a larger value. For now, we use them for
+ * the kernel only.
*/
dd->ipath_rcvegrbufsize = 2048;
/*
@@ -1174,9 +1270,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
/*
- * For PE-800, we can request a receive interrupt for 1 or
+ * We can request a receive interrupt for 1 or
* more packets from current offset. For now, we set this
- * up for a single packet, to match the HT-400 behavior.
+ * up for a single packet.
*/
dd->ipath_rhdrhead_intr_off = 1ULL<<32;
@@ -1192,7 +1288,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
/**
* ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
+ * @pd: the infinipath port
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
@@ -1201,6 +1297,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
{
struct ipath_base_info *kinfo = kbase;
+ struct ipath_devdata *dd;
if (ipath_unordered_wc()) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
@@ -1209,19 +1306,31 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
else
ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+ if (pd == NULL)
+ goto done;
+ dd = pd->port_dd;
+
+ if (dd != NULL && dd->ipath_minrev >= 2) {
+ ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
+ ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
+ }
+
+done:
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
return 0;
}
/**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
{
dd->ipath_f_intrsetup = ipath_pe_intconfig;
dd->ipath_f_bus = ipath_setup_pe_config;
@@ -1233,7 +1342,10 @@ void ipath_init_pe800_funcs(struct ipath_devdata *dd)
dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
dd->ipath_f_clear_tids = ipath_pe_clear_tids;
- dd->ipath_f_put_tid = ipath_pe_put_tid;
+ if (dd->ipath_minrev >= 2)
+ dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+ else
+ dd->ipath_f_put_tid = ipath_pe_put_tid;
dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
dd->ipath_f_setextled = ipath_setup_pe_setextled;
dd->ipath_f_get_base_info = ipath_pe_get_base_info;
@@ -1248,6 +1360,6 @@ void ipath_init_pe800_funcs(struct ipath_devdata *dd)
dd->ipath_kregs = &ipath_pe_kregs;
dd->ipath_cregs = &ipath_pe_cregs;
- ipath_init_pe_variables();
+ ipath_init_pe_variables(dd);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index dc83250d26a6..d819cca524cd 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -35,7 +36,7 @@
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
-#include "ips_common.h"
+#include "ipath_common.h"
/*
* min buffers we want to have per port, after driver
@@ -52,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send). Reserved at end of buffer list. Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list. Initialized based on
* number of PIO buffers if not set via module interface.
* The problem with this is that it's global, but we'll use different
* numbers for different chip types. So the default value is not
@@ -79,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
*
* Allocate the eager TID buffers and program them into infinipath.
* We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
* packets, or pass the buffers up to the ipath layered driver and
* thence the network layer, replacing them as we do so (see
* ipath_rcv_layer()).
@@ -87,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
static int create_port0_egr(struct ipath_devdata *dd)
{
unsigned e, egrcnt;
- struct sk_buff **skbs;
+ struct ipath_skbinfo *skbinfo;
int ret;
egrcnt = dd->ipath_rcvegrcnt;
- skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
- if (skbs == NULL) {
+ skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+ if (skbinfo == NULL) {
ipath_dev_err(dd, "allocation error for eager TID "
"skb array\n");
ret = -ENOMEM;
@@ -108,12 +109,13 @@ static int create_port0_egr(struct ipath_devdata *dd)
* 4 bytes so that the data buffer stays word aligned.
* See ipath_kreceive() for more details.
*/
- skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
- if (!skbs[e]) {
+ skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+ if (!skbinfo[e].skb) {
ipath_dev_err(dd, "SKB allocation error for "
"eager TID %u\n", e);
while (e != 0)
- dev_kfree_skb(skbs[--e]);
+ dev_kfree_skb(skbinfo[--e].skb);
+ vfree(skbinfo);
ret = -ENOMEM;
goto bail;
}
@@ -122,14 +124,17 @@ static int create_port0_egr(struct ipath_devdata *dd)
* After loop above, so we can test non-NULL to see if ready
* to use at receive, etc.
*/
- dd->ipath_port0_skbs = skbs;
+ dd->ipath_port0_skbinfo = skbinfo;
for (e = 0; e < egrcnt; e++) {
- unsigned long phys =
- virt_to_phys(dd->ipath_port0_skbs[e]->data);
+ dd->ipath_port0_skbinfo[e].phys =
+ ipath_map_single(dd->pcidev,
+ dd->ipath_port0_skbinfo[e].skb->data,
+ dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0, phys);
+ dd->ipath_rcvegrbase), 0,
+ dd->ipath_port0_skbinfo[e].phys);
}
ret = 0;
@@ -238,7 +243,11 @@ static int init_chip_first(struct ipath_devdata *dd,
"only supports %u\n", ipath_cfgports,
dd->ipath_portcnt);
}
- dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+ /*
+ * Allocate full portcnt array, rather than just cfgports, because
+ * cleanup iterates across all possible ports.
+ */
+ dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
GFP_KERNEL);
if (!dd->ipath_pd) {
@@ -275,7 +284,7 @@ static int init_chip_first(struct ipath_devdata *dd,
pd->port_port = 0;
pd->port_cnt = 1;
/* The port 0 pkey table is used by the layer interface. */
- pd->port_pkeys[0] = IPS_DEFAULT_P_KEY;
+ pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
dd->ipath_rcvtidcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
dd->ipath_rcvtidbase =
@@ -409,17 +418,8 @@ static int init_pioavailregs(struct ipath_devdata *dd)
/* and its length */
dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
- if (dd->ipath_unit * 64 > (IPATH_PORT0_RCVHDRTAIL_SIZE - 64)) {
- ipath_dev_err(dd, "unit %u too large for port 0 "
- "rcvhdrtail buffer size\n", dd->ipath_unit);
- ret = -ENODEV;
- }
- else
- ret = 0;
+ ret = 0;
- /* so we can get current tail in ipath_kreceive(), per chip */
- dd->ipath_hdrqtailptr = &ipath_port0_rcvhdrtail[
- dd->ipath_unit * (64 / sizeof(*ipath_port0_rcvhdrtail))];
done:
return ret;
}
@@ -435,16 +435,33 @@ done:
*/
static void init_shadow_tids(struct ipath_devdata *dd)
{
- dd->ipath_pageshadow = (struct page **)
- vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ struct page **pages;
+ dma_addr_t *addrs;
+
+ pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
- if (!dd->ipath_pageshadow)
+ if (!pages) {
ipath_dev_err(dd, "failed to allocate shadow page * "
"array, no expected sends!\n");
- else
- memset(dd->ipath_pageshadow, 0,
- dd->ipath_cfgports * dd->ipath_rcvtidcnt *
- sizeof(struct page *));
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(dma_addr_t));
+ if (!addrs) {
+ ipath_dev_err(dd, "failed to allocate shadow dma handle "
+ "array, no expected sends!\n");
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+
+ dd->ipath_pageshadow = pages;
+ dd->ipath_physshadow = addrs;
}
static void enable_chip(struct ipath_devdata *dd,
@@ -453,9 +470,9 @@ static void enable_chip(struct ipath_devdata *dd,
u32 val;
int i;
- if (!reinit) {
- init_waitqueue_head(&ipath_sma_state_wait);
- }
+ if (!reinit)
+ init_waitqueue_head(&ipath_state_wait);
+
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
@@ -652,8 +669,9 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0, i;
u32 val32, kpiobufs;
- u64 val, atmp;
+ u64 val;
struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
+ gfp_t gfp_flags = GFP_USER | __GFP_COMP;
ret = init_housekeeping(dd, &pd, reinit);
if (ret)
@@ -693,7 +711,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
if (ipath_kpiobufs == 0) {
- /* not set by user, or set explictly to default */
+ /* not set by user (this is default) */
if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
kpiobufs = 32;
else
@@ -775,24 +793,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
goto done;
}
- val = ipath_port0_rcvhdrtail_dma + dd->ipath_unit * 64;
-
- /* verify that the alignment requirement was met */
- ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
- 0, val);
- atmp = ipath_read_kreg64_port(
- dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 0);
- if (val != atmp) {
- ipath_dev_err(dd, "Catastrophic software error, "
- "RcvHdrTailAddr0 written as %llx, "
- "read back as %llx from %x\n",
- (unsigned long long) val,
- (unsigned long long) atmp,
- dd->ipath_kregs->kr_rcvhdrtailaddr);
- ret = -EINVAL;
- goto done;
- }
-
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
/*
@@ -836,25 +836,45 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
/* clear any interrups up to this point (ints still not enabled) */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
- ipath_stats.sps_lid[dd->ipath_unit] = dd->ipath_lid;
-
/*
* Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
* existing, and re-allocate.
*/
- if (reinit)
- ipath_free_pddata(dd, 0, 0);
+ if (reinit) {
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+ dd->ipath_pd[0] = NULL;
+ ipath_free_pddata(dd, pd);
+ }
dd->ipath_f_tidtemplate(dd);
ret = ipath_create_rcvhdrq(dd, pd);
- if (!ret)
+ if (!ret) {
+ dd->ipath_hdrqtailptr =
+ (volatile __le64 *)pd->port_rcvhdrtail_kvaddr;
ret = create_port0_egr(dd);
+ }
if (ret)
ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
"rcvhdrq and/or egr bufs\n");
else
enable_chip(dd, pd, reinit);
+
+ if (!ret && !reinit) {
+ /* used when we close a port, for DMA already in flight at close */
+ dd->ipath_dummy_hdrq = dma_alloc_coherent(
+ &dd->pcidev->dev, pd->port_rcvhdrq_size,
+ &dd->ipath_dummy_hdrq_phys,
+ gfp_flags);
+ if (!dd->ipath_dummy_hdrq ) {
+ dev_info(&dd->pcidev->dev,
+ "Couldn't allocate 0x%lx bytes for dummy hdrq\n",
+ pd->port_rcvhdrq_size);
+ /* fallback to just 0'ing */
+ dd->ipath_dummy_hdrq_phys = 0UL;
+ }
+ }
+
/*
* cause retrigger of pending interrupts ignored during init,
* even if we had errors
@@ -950,6 +970,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
}
+ ipath_kpiobufs = val;
ret = 0;
bail:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 3e72a1fe3d73..6bee53ce5f33 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,31 +34,21 @@
#include <linux/pci.h>
#include "ipath_kernel.h"
-#include "ips_common.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
-#define E_SUM_PKTERRS \
- (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
- INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
- INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
- INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
- INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
- INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
-
-#define E_SUM_ERRS \
- (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
- INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
- INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
- INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
- INFINIPATH_E_INVALIDADDR)
-
-static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
{
- unsigned long sbuf[4];
- u64 ignore_this_time = 0;
u32 piobcnt;
-
- /* if possible that sendbuffererror could be valid */
+ unsigned long sbuf[4];
+ /*
+ * it's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling
+ */
piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/* read these before writing errorclear */
sbuf[0] = ipath_read_kreg64(
@@ -73,36 +64,58 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
+ if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+ __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+ "SendbufErrs %lx %lx", sbuf[0],
+ sbuf[1]);
+ if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+ printk(" %lx %lx ", sbuf[2], sbuf[3]);
+ printk("\n");
+ }
- ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
- if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
- printk("%lx %lx ", sbuf[2], sbuf[3]);
- for (i = 0; i < piobcnt; i++) {
- if (test_bit(i, sbuf)) {
- u32 __iomem *piobuf;
- if (i < dd->ipath_piobcnt2k)
- piobuf = (u32 __iomem *)
- (dd->ipath_pio2kbase +
- i * dd->ipath_palign);
- else
- piobuf = (u32 __iomem *)
- (dd->ipath_pio4kbase +
- (i - dd->ipath_piobcnt2k) *
- dd->ipath_4kalign);
-
- ipath_cdbg(PKT,
- "PIObuf[%u] @%p pbc is %x; ",
- i, piobuf, readl(piobuf));
-
+ for (i = 0; i < piobcnt; i++)
+ if (test_bit(i, sbuf))
ipath_disarm_piobufs(dd, i, 1);
- }
- }
- if (ipath_debug & __IPATH_PKTDBG)
- printk("\n");
+ dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
}
- if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT |
- INFINIPATH_E_SDROPPEDSMPPKT |
- INFINIPATH_E_SMINPKTLEN)) &&
+}
+
+
+/* These are all rcv-related errors which we want to count for stats */
+#define E_SUM_PKTERRS \
+ (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
+ INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
+ INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
+ INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
+ INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
+ INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
+
+/* These are all send-related errors which we want to count for stats */
+#define E_SUM_ERRS \
+ (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
+ INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
+ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
+ INFINIPATH_E_INVALIDADDR)
+
+/*
+ * these are errors that can occur when the link changes state while
+ * a packet is being sent or received. This doesn't cover things
+ * like EBP or VCRC that can be the result of a sending having the
+ * link change state, so we receive a "known bad" packet.
+ */
+#define E_SUM_LINK_PKTERRS \
+ (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
+ INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
+ INFINIPATH_E_RUNEXPCHAR)
+
+static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
+{
+ u64 ignore_this_time = 0;
+
+ ipath_disarm_senderrbufs(dd);
+ if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
* This can happen when SMA is trying to bring the link
@@ -111,16 +124,90 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
* valid. We don't want to confuse people, so we just
* don't print them, except at debug
*/
- ipath_dbg("Ignoring pktsend errors %llx, because not "
- "yet active\n", (unsigned long long) errs);
- ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT |
- INFINIPATH_E_SDROPPEDSMPPKT |
- INFINIPATH_E_SMINPKTLEN;
+ ipath_dbg("Ignoring packet errors %llx, because link not "
+ "ACTIVE\n", (unsigned long long) errs);
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
}
return ignore_this_time;
}
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
+ .msg = "TXE " #a " Memory Parity" \
+ }
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
+ .msg = "RXE " #a " Memory Parity" \
+ }
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+ INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+ strlcat(msg, "[", msgl);
+ strlcat(msg, hwmsg, msgl);
+ strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t msgl)
+{
+ int i;
+ const int glen =
+ sizeof(ipath_generic_hwerror_msgs) /
+ sizeof(ipath_generic_hwerror_msgs[0]);
+
+ for (i=0; i<glen; i++) {
+ if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl,
+ ipath_generic_hwerror_msgs[i].msg);
+ }
+ }
+
+ for (i=0; i<nhwerrmsgs; i++) {
+ if (hwerrs & hwerrmsgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+ }
+ }
+}
+
/* return the strings for the most common link states */
static char *ib_linkstate(u32 linkstate)
{
@@ -156,7 +243,29 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
*/
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
lstate = val & IPATH_IBSTATE_MASK;
- if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
+
+ /*
+ * this is confusing enough when it happens that I want to always put it
+ * on the console and in the logs. If it was a requested state change,
+ * we'll have already cleared the flags, so we won't print this warning
+ */
+ if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE)
+ && (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
+ dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n",
+ (dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE",
+ ib_linkstate(lstate));
+ /*
+ * Flush all queued sends when link went to DOWN or INIT,
+ * to be sure that they don't block SMA and other MAD packets
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ INFINIPATH_S_ABORT);
+ ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
+ (unsigned)(dd->ipath_piobcnt2k +
+ dd->ipath_piobcnt4k) -
+ dd->ipath_lastport_piobuf);
+ }
+ else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE) {
/*
* only print at SMA if there is a change, debug if not
@@ -168,7 +277,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
ib_linkstate(lstate));
}
else
- ipath_cdbg(SMA, "Unit %u link state %s, last "
+ ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
"was %s\n", dd->ipath_unit,
ib_linkstate(lstate),
ib_linkstate((unsigned)
@@ -180,7 +289,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
if (lstate == IPATH_IBSTATE_INIT ||
lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE)
- ipath_cdbg(SMA, "Unit %u link state down"
+ ipath_cdbg(VERBOSE, "Unit %u link state down"
" (state 0x%x), from %s\n",
dd->ipath_unit,
(u32)val & IPATH_IBSTATE_MASK,
@@ -229,13 +338,14 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
| IPATH_LINKACTIVE |
IPATH_LINKARMED);
*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ dd->ipath_lli_counter = 0;
if (!noprint) {
if (((dd->ipath_lastibcstat >>
INFINIPATH_IBCS_LINKSTATE_SHIFT) &
INFINIPATH_IBCS_LINKSTATE_MASK)
== INFINIPATH_IBCS_L_STATE_ACTIVE)
/* if from up to down be more vocal */
- ipath_cdbg(SMA,
+ ipath_cdbg(VERBOSE,
"Unit %u link now down (%s)\n",
dd->ipath_unit,
ipath_ibcstatus_str[ltstate]);
@@ -255,8 +365,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
*dd->ipath_statusp |=
IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
dd->ipath_f_setextled(dd, lstate, ltstate);
-
- __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
/*
* set INIT and DOWN. Down is checked by most of the other
@@ -350,7 +458,7 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd,
return supp_msgs;
}
-static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
+static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
{
char msg[512];
u64 ignore_this_time = 0;
@@ -372,13 +480,26 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
}
- if (!noprint && (errs & ~infinipath_e_bitsextant))
+ if (!noprint && (errs & ~dd->ipath_e_bitsextant))
ipath_dev_err(dd, "error interrupt with unknown errors "
"%llx set\n", (unsigned long long)
- (errs & ~infinipath_e_bitsextant));
+ (errs & ~dd->ipath_e_bitsextant));
if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs);
+ else if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ /*
+ * This can happen when SMA is trying to bring the link
+ * up, but the IB link changes state at the "wrong" time.
+ * The IB logic then complains that the packet isn't
+ * valid. We don't want to confuse people, so we just
+ * don't print them, except at debug
+ */
+ ipath_dbg("Ignoring packet errors %llx, because link not "
+ "ACTIVE\n", (unsigned long long) errs);
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
if (supp_msgs == 250000) {
/*
@@ -397,7 +518,7 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
ipath_dev_err(dd, "Disabling error(s) %llx because "
- "occuring too frequently (%s)\n",
+ "occurring too frequently (%s)\n",
(unsigned long long)
(dd->ipath_maskederrs &
~dd->ipath_ignorederrs), msg);
@@ -433,8 +554,16 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
~(INFINIPATH_E_HARDWARE |
INFINIPATH_E_IBSTATUSCHANGED);
}
+
+ /* likely due to cancel, so suppress */
+ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+ dd->ipath_lastcancel > jiffies) {
+ ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+ errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+ }
+
if (!errs)
- return;
+ return 0;
if (!noprint)
/*
@@ -484,7 +613,7 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* don't report same point multiple times,
* except kernel
*/
- tl = (u32) * pd->port_rcvhdrtail_kvaddr;
+ tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
if (tl == dd->ipath_lastrcvhdrqtails[i])
continue;
hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
@@ -493,10 +622,10 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
continue;
if (hd == (tl + 1) ||
(!hd && tl == dd->ipath_hdrqlast)) {
- dd->ipath_lastrcvhdrqtails[i] = tl;
- pd->port_hdrqfull++;
if (i == 0)
chkerrpkts = 1;
+ dd->ipath_lastrcvhdrqtails[i] = tl;
+ pd->port_hdrqfull++;
}
}
}
@@ -551,16 +680,14 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
if (!noprint && *msg)
ipath_dev_err(dd, "%s error\n", msg);
- if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
- ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
- "waking\n", dd->ipath_sma_state_wanted,
+ if (dd->ipath_state_wanted & dd->ipath_flags) {
+ ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+ "waking\n", dd->ipath_state_wanted,
dd->ipath_flags);
- wake_up_interruptible(&ipath_sma_state_wait);
+ wake_up_interruptible(&ipath_state_wait);
}
- if (chkerrpkts)
- /* process possible error packets in hdrq */
- ipath_kreceive(dd);
+ return chkerrpkts;
}
/* this is separate to allow for better optimization of ipath_intr() */
@@ -663,11 +790,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
{
int ret;
- ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
- if (ret > 0)
- goto set;
-
- ret = __ipath_verbs_piobufavail(dd);
+ ret = ipath_ib_piobufavail(dd->verbs_dev);
if (ret > 0)
goto set;
@@ -678,32 +801,32 @@ set:
dd->ipath_sendctrl);
}
-static void handle_rcv(struct ipath_devdata *dd, u32 istat)
+/*
+ * Handle receive interrupts for user ports; this means a user
+ * process was waiting for a packet to arrive, and didn't want
+ * to poll
+ */
+static void handle_urcv(struct ipath_devdata *dd, u32 istat)
{
u64 portr;
int i;
int rcvdint = 0;
portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
- infinipath_i_rcvavail_mask)
+ dd->ipath_i_rcvavail_mask)
| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
- infinipath_i_rcvurg_mask);
- for (i = 0; i < dd->ipath_cfgports; i++) {
+ dd->ipath_i_rcvurg_mask);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
- if (portr & (1 << i) && pd &&
- pd->port_cnt) {
- if (i == 0)
- ipath_kreceive(dd);
- else if (test_bit(IPATH_PORT_WAITING_RCV,
- &pd->port_flag)) {
- int rcbit;
- clear_bit(IPATH_PORT_WAITING_RCV,
- &pd->port_flag);
- rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
- clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
- wake_up_interruptible(&pd->port_wait);
- rcvdint = 1;
- }
+ if (portr & (1 << i) && pd && pd->port_cnt &&
+ test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
+ int rcbit;
+ clear_bit(IPATH_PORT_WAITING_RCV,
+ &pd->port_flag);
+ rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
+ clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+ wake_up_interruptible(&pd->port_wait);
+ rcvdint = 1;
}
}
if (rcvdint) {
@@ -719,16 +842,19 @@ static void handle_rcv(struct ipath_devdata *dd, u32 istat)
irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
{
struct ipath_devdata *dd = data;
- u32 istat;
+ u32 istat, chk0rcv = 0;
ipath_err_t estat = 0;
- static unsigned unexpected = 0;
irqreturn_t ret;
+ u32 oldhead, curtail;
+ static unsigned unexpected = 0;
+ static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) |
+ (1U<<INFINIPATH_I_RCVURG_SHIFT);
+
+ ipath_stats.sps_ints++;
- if(!(dd->ipath_flags & IPATH_PRESENT)) {
- /* this is mostly so we don't try to touch the chip while
- * it is being reset */
+ if (!(dd->ipath_flags & IPATH_PRESENT)) {
/*
- * This return value is perhaps odd, but we do not want the
+ * This return value is not great, but we do not want the
* interrupt core code to remove our interrupt handler
* because we don't appear to be handling an interrupt
* during a chip reset.
@@ -736,7 +862,51 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
return IRQ_HANDLED;
}
+ /*
+ * this needs to be flags&initted, not statusp, so we keep
+ * taking interrupts even after link goes down, etc.
+ * Also, we *must* clear the interrupt at some point, or we won't
+ * take it again, which can be real bad for errors, etc...
+ */
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ ipath_bad_intr(dd, &unexpected);
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ /*
+ * We try to avoid reading the interrupt status register, since
+ * that's a PIO read, and stalls the processor for up to about
+ * ~0.25 usec. The idea is that if we processed a port0 packet,
+ * we blindly clear the port 0 receive interrupt bits, and nothing
+ * else, then return. If other interrupts are pending, the chip
+ * will re-interrupt us as soon as we write the intclear register.
+ * We then won't process any more kernel packets (if not the 2nd
+ * time, then the 3rd or 4th) and we'll then handle the other
+ * interrupts. We clear the interrupts first so that we don't
+ * lose intr for later packets that arrive while we are processing.
+ */
+ oldhead = dd->ipath_port0head;
+ curtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
+ if (oldhead != curtail) {
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+ (u64) (1 << IPATH_GPIO_PORT0_BIT));
+ istat = port0rbits | INFINIPATH_I_GPIO;
+ }
+ else
+ istat = port0rbits;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
+ ipath_kreceive(dd);
+ if (oldhead != dd->ipath_port0head) {
+ ipath_stats.sps_fastrcvint++;
+ goto done;
+ }
+ }
+
istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
+
if (unlikely(!istat)) {
ipath_stats.sps_nullintr++;
ret = IRQ_NONE; /* not our interrupt, or already handled */
@@ -749,31 +919,17 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
goto bail;
}
- ipath_stats.sps_ints++;
-
- /*
- * this needs to be flags&initted, not statusp, so we keep
- * taking interrupts even after link goes down, etc.
- * Also, we *must* clear the interrupt at some point, or we won't
- * take it again, which can be real bad for errors, etc...
- */
-
- if (!(dd->ipath_flags & IPATH_INITTED)) {
- ipath_bad_intr(dd, &unexpected);
- ret = IRQ_NONE;
- goto bail;
- }
if (unexpected)
unexpected = 0;
- ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
-
- if (istat & ~infinipath_i_bitsextant)
+ if (unlikely(istat & ~dd->ipath_i_bitsextant))
ipath_dev_err(dd,
"interrupt with unknown interrupts %x set\n",
- istat & (u32) ~ infinipath_i_bitsextant);
+ istat & (u32) ~ dd->ipath_i_bitsextant);
+ else
+ ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
- if (istat & INFINIPATH_I_ERROR) {
+ if (unlikely(istat & INFINIPATH_I_ERROR)) {
ipath_stats.sps_errints++;
estat = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_errorstatus);
@@ -788,43 +944,117 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
ipath_dev_err(dd, "Read of error status failed "
"(all bits set); ignoring\n");
else
- handle_errors(dd, estat);
+ if (handle_errors(dd, estat))
+ /* force calling ipath_kreceive() */
+ chk0rcv = 1;
}
if (istat & INFINIPATH_I_GPIO) {
- if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
- u32 gpiostatus;
- gpiostatus = ipath_read_kreg32(
- dd, dd->ipath_kregs->kr_gpio_status);
- ipath_dbg("Unexpected GPIO interrupt bits %x\n",
- gpiostatus);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- gpiostatus);
- }
- else {
- /* Clear GPIO status bit 2 */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << 2));
+ /*
+ * GPIO interrupts fall in two broad classes:
+ * GPIO_2 indicates (on some HT4xx boards) that a packet
+ * has arrived for Port 0. Checking for this
+ * is controlled by flag IPATH_GPIO_INTR.
+ * GPIO_3..5 on IBA6120 Rev2 chips indicate errors
+ * that we need to count. Checking for this
+ * is controlled by flag IPATH_GPIO_ERRINTRS.
+ */
+ u32 gpiostatus;
+ u32 to_clear = 0;
+
+ gpiostatus = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_status);
+ /* First the error-counter case.
+ */
+ if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+ /* want to clear the bits we see asserted. */
+ to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
/*
- * Packets are available in the port 0 rcv queue.
- * Eventually this needs to be generalized to check
- * IPATH_GPIO_INTR, and the specific GPIO bit, if
- * GPIO interrupts are used for anything else.
+ * Count appropriately, clear bits out of our copy,
+ * as they have been "handled".
*/
- ipath_kreceive(dd);
+ if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+ ipath_dbg("FlowCtl on UnsupVL\n");
+ dd->ipath_rxfc_unsupvl_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+ ipath_dbg("Overrun Threshold exceeded\n");
+ dd->ipath_overrun_thresh_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+ ipath_dbg("Local Link Integrity error\n");
+ dd->ipath_lli_errs++;
+ }
+ gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
+ }
+ /* Now the Port0 Receive case */
+ if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+ (dd->ipath_flags & IPATH_GPIO_INTR)) {
+ /*
+ * GPIO status bit 2 is set, and we expected it.
+ * clear it and indicate in p0bits.
+ * This probably only happens if a Port0 pkt
+ * arrives at _just_ the wrong time, and we
+ * handle that by seting chk0rcv;
+ */
+ to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+ gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
+ chk0rcv = 1;
+ }
+ if (unlikely(gpiostatus)) {
+ /*
+ * Some unexpected bits remain. If they could have
+ * caused the interrupt, complain and clear.
+ * MEA: this is almost certainly non-ideal.
+ * we should look into auto-disable of unexpected
+ * GPIO interrupts, possibly on a "three strikes"
+ * basis.
+ */
+ u32 mask;
+ mask = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_mask);
+ if (mask & gpiostatus) {
+ ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+ gpiostatus & mask);
+ to_clear |= (gpiostatus & mask);
+ }
+ }
+ if (to_clear) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+ (u64) to_clear);
}
}
+ chk0rcv |= istat & port0rbits;
/*
- * clear the ones we will deal with on this round
- * We clear it early, mostly for receive interrupts, so we
- * know the chip will have seen this by the time we process
- * the queue, and will re-interrupt if necessary. The processor
- * itself won't take the interrupt again until we return.
+ * Clear the interrupt bits we found set, unless they are receive
+ * related, in which case we already cleared them above, and don't
+ * want to clear them again, because we might lose an interrupt.
+ * Clear it early, so we "know" know the chip will have seen this by
+ * the time we process the queue, and will re-interrupt if necessary.
+ * The processor itself won't take the interrupt again until we return.
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
+ /*
+ * handle port0 receive before checking for pio buffers available,
+ * since receives can overflow; piobuf waiters can afford a few
+ * extra cycles, since they were waiting anyway, and user's waiting
+ * for receive are at the bottom.
+ */
+ if (chk0rcv) {
+ ipath_kreceive(dd);
+ istat &= ~port0rbits;
+ }
+
+ if (istat & ((dd->ipath_i_rcvavail_mask <<
+ INFINIPATH_I_RCVAVAIL_SHIFT)
+ | (dd->ipath_i_rcvurg_mask <<
+ INFINIPATH_I_RCVURG_SHIFT)))
+ handle_urcv(dd, istat);
+
if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
@@ -836,17 +1066,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
handle_layer_pioavail(dd);
}
- /*
- * we check for both transition from empty to non-empty, and urgent
- * packets (those with the interrupt bit set in the header)
- */
-
- if (istat & ((infinipath_i_rcvavail_mask <<
- INFINIPATH_I_RCVAVAIL_SHIFT)
- | (infinipath_i_rcvurg_mask <<
- INFINIPATH_I_RCVURG_SHIFT)))
- handle_rcv(dd, istat);
-
+done:
ret = IRQ_HANDLED;
bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 5d92d57b6f54..d7540b71b451 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1,6 +1,7 @@
#ifndef _IPATH_KERNEL_H
#define _IPATH_KERNEL_H
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -38,6 +39,8 @@
*/
#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
#include <asm/io.h>
#include "ipath_common.h"
@@ -61,9 +64,7 @@ struct ipath_portdata {
/* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
- u64 *port_rcvhdrtail_kvaddr;
- /* page * used for uaddr */
- struct page *port_rcvhdrtail_pagep;
+ void *port_rcvhdrtail_kvaddr;
/*
* temp buffer for expected send setup, allocated at open, instead
* of each setup call
@@ -78,14 +79,10 @@ struct ipath_portdata {
dma_addr_t port_rcvegr_phys;
/* mmap of hdrq, must fit in 44 bits */
dma_addr_t port_rcvhdrq_phys;
+ dma_addr_t port_rcvhdrqtailaddr_phys;
/*
- * the actual user address that we ipath_mlock'ed, so we can
- * ipath_munlock it at close
- */
- unsigned long port_rcvhdrtail_uaddr;
- /*
- * number of opens on this instance (0 or 1; ignoring forks, dup,
- * etc. for now)
+ * number of opens (including slave subports) on this instance
+ * (ignoring forks, dup, etc. for now)
*/
int port_cnt;
/*
@@ -94,6 +91,10 @@ struct ipath_portdata {
*/
/* instead of calculating it */
unsigned port_port;
+ /* non-zero if port is being shared. */
+ u16 port_subport_cnt;
+ /* non-zero if port is being shared. */
+ u16 port_subport_id;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -126,6 +127,16 @@ struct ipath_portdata {
u16 port_pkeys[4];
/* so file ops can get at unit */
struct ipath_devdata *port_dd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subport_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subport_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subport_rcvhdr_base;
+ /* The version of the library which opened this port */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
};
struct sk_buff;
@@ -137,10 +148,9 @@ struct _ipath_layer {
void *l_arg;
};
-/* Verbs layer interface */
-struct _verbs_layer {
- void *l_arg;
- struct timer_list l_timer;
+struct ipath_skbinfo {
+ struct sk_buff *skb;
+ dma_addr_t phys;
};
struct ipath_devdata {
@@ -158,20 +168,14 @@ struct ipath_devdata {
/* base of memory alloced for ipath_kregbase, for free */
u64 *ipath_kregalloc;
/*
- * version of kregbase that doesn't have high bits set (for 32 bit
- * programs, so mmap64 44 bit works)
- */
- u64 __iomem *ipath_kregvirt;
- /*
* virtual address where port0 rcvhdrqtail updated for this unit.
* only written to by the chip, not the driver.
*/
volatile __le64 *ipath_hdrqtailptr;
- dma_addr_t ipath_dma_addr;
/* ipath_cfgports pointers */
struct ipath_portdata **ipath_pd;
/* sk_buffs used by port 0 eager receive queue */
- struct sk_buff **ipath_port0_skbs;
+ struct ipath_skbinfo *ipath_port0_skbinfo;
/* kvirt address of 1st 2k pio buffer */
void __iomem *ipath_pio2kbase;
/* kvirt address of 1st 4k pio buffer */
@@ -209,7 +213,8 @@ struct ipath_devdata {
void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
/* fill out chip-specific fields */
int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
- struct _verbs_layer verbs_layer;
+ struct ipath_ibdev *verbs_dev;
+ struct timer_list verbs_timer;
/* total dwords sent (summed from counter) */
u64 ipath_sword;
/* total dwords rcvd (summed from counter) */
@@ -252,7 +257,7 @@ struct ipath_devdata {
u64 ipath_tidtemplate;
/* value to write to free TIDs */
u64 ipath_tidinvalid;
- /* PE-800 rcv interrupt setup */
+ /* IBA6120 rcv interrupt setup */
u64 ipath_rhdrhead_intr_off;
/* size of memory at ipath_kregbase */
@@ -261,8 +266,8 @@ struct ipath_devdata {
u32 ipath_pioavregs;
/* IPATH_POLL, etc. */
u32 ipath_flags;
- /* ipath_flags sma is waiting for */
- u32 ipath_sma_state_wanted;
+ /* ipath_flags driver is waiting for */
+ u32 ipath_state_wanted;
/* last buffer for user use, first buf for kernel use is this
* index. */
u32 ipath_lastport_piobuf;
@@ -322,10 +327,6 @@ struct ipath_devdata {
u32 ipath_pcibar0;
/* so we can rewrite it after a chip reset */
u32 ipath_pcibar1;
- /* sequential tries for SMA send and no bufs */
- u32 ipath_nosma_bufs;
- /* duration (seconds) ipath_nosma_bufs set */
- u32 ipath_nosma_secs;
/* HT/PCI Vendor ID (here for NodeInfo) */
u16 ipath_vendorid;
@@ -335,12 +336,16 @@ struct ipath_devdata {
u8 ipath_ht_slave_off;
/* for write combining settings */
unsigned long ipath_wc_cookie;
+ unsigned long ipath_wc_base;
+ unsigned long ipath_wc_len;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
/* shadow copy of all exptids physaddr; used only by funcsim */
u64 *ipath_tidsimshadow;
/* shadow copy of struct page *'s for exp tid pages */
struct page **ipath_pageshadow;
+ /* shadow copy of dma handles for exp tid pages */
+ dma_addr_t *ipath_physshadow;
/* lock to workaround chip bug 9437 */
spinlock_t ipath_tid_lock;
@@ -354,13 +359,17 @@ struct ipath_devdata {
char *ipath_freezemsg;
/* pci access data structure */
struct pci_dev *pcidev;
- struct cdev *cdev;
- struct class_device *class_dev;
+ struct cdev *user_cdev;
+ struct cdev *diag_cdev;
+ struct class_device *user_class_dev;
+ struct class_device *diag_class_dev;
/* timer used to prevent stats overflow, error throttling, etc. */
struct timer_list ipath_stats_timer;
/* check for stale messages in rcv queue */
/* only allow one intr at a time. */
unsigned long ipath_rcv_pending;
+ void *ipath_dummy_hdrq; /* used after port close */
+ dma_addr_t ipath_dummy_hdrq_phys;
/*
* Shadow copies of registers; size indicates read access size.
@@ -418,6 +427,9 @@ struct ipath_devdata {
unsigned long ipath_rcvctrl;
/* shadow kr_sendctrl */
unsigned long ipath_sendctrl;
+ /* ports waiting for PIOavail intr */
+ unsigned long ipath_portpiowait;
+ unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
/* value we put in kr_rcvhdrcnt */
u32 ipath_rcvhdrcnt;
@@ -481,8 +493,6 @@ struct ipath_devdata {
u32 ipath_htwidth;
/* HT speed (200,400,800,1000) from HT config */
u32 ipath_htspeed;
- /* ports waiting for PIOavail intr */
- unsigned long ipath_portpiowait;
/*
* number of sequential ibcstatus change for polling active/quiet
* (i.e., link not coming up).
@@ -500,8 +510,11 @@ struct ipath_devdata {
u16 ipath_lid;
/* list of pkeys programmed; 0 if not set */
u16 ipath_pkeys[4];
- /* ASCII serial number, from flash */
- u8 ipath_serial[12];
+ /*
+ * ASCII serial number, from flash, large enough for original
+ * all digit strings, and longer QLogic serial number format
+ */
+ u8 ipath_serial[16];
/* human readable board version */
u8 ipath_boardversion[80];
/* chip major rev, from ipath_revision */
@@ -516,34 +529,64 @@ struct ipath_devdata {
u8 ipath_pci_cacheline;
/* LID mask control */
u8 ipath_lmc;
-};
+ /* Rx Polarity inversion (compensate for ~tx on partner) */
+ u8 ipath_rx_pol_inv;
+
+ /* local link integrity counter */
+ u32 ipath_lli_counter;
+ /* local link integrity errors */
+ u32 ipath_lli_errors;
+ /*
+ * Above counts only cases where _successive_ LocalLinkIntegrity
+ * errors were seen in the receive headers of kern-packets.
+ * Below are the three (monotonically increasing) counters
+ * maintained via GPIO interrupts on iba6120-rev2.
+ */
+ u32 ipath_rxfc_unsupvl_errs;
+ u32 ipath_overrun_thresh_errs;
+ u32 ipath_lli_errs;
-extern volatile __le64 *ipath_port0_rcvhdrtail;
-extern dma_addr_t ipath_port0_rcvhdrtail_dma;
+ /*
+ * Not all devices managed by a driver instance are the same
+ * type, so these fields must be per-device.
+ */
+ u64 ipath_i_bitsextant;
+ ipath_err_t ipath_e_bitsextant;
+ ipath_err_t ipath_hwe_bitsextant;
+
+ /*
+ * Below should be computable from number of ports,
+ * since they are never modified.
+ */
+ u32 ipath_i_rcvavail_mask;
+ u32 ipath_i_rcvurg_mask;
-#define IPATH_PORT0_RCVHDRTAIL_SIZE PAGE_SIZE
+ /*
+ * Register bits for selecting i2c direction and values, used for
+ * I2C serial flash.
+ */
+ u16 ipath_gpio_sda_num;
+ u16 ipath_gpio_scl_num;
+ u64 ipath_gpio_sda;
+ u64 ipath_gpio_scl;
+};
+/* Private data for file operations */
+struct ipath_filedata {
+ struct ipath_portdata *pd;
+ unsigned subport;
+ unsigned tidcursor;
+};
extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit);
-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
- struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_del(struct ipath_devdata *);
-
int ipath_init_chip(struct ipath_devdata *, int);
int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
@@ -551,14 +594,13 @@ int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
void ipath_cdev_cleanup(struct cdev **cdevp,
struct class_device **class_devp);
-int ipath_diag_init(void);
-void ipath_diag_cleanup(void);
-void ipath_diag_bringup_link(struct ipath_devdata *);
+int ipath_diag_add(struct ipath_devdata *);
+void ipath_diag_remove(struct ipath_devdata *);
-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;
int ipath_user_add(struct ipath_devdata *dd);
-void ipath_user_del(struct ipath_devdata *dd);
+void ipath_user_remove(struct ipath_devdata *dd);
struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
@@ -582,19 +624,25 @@ void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
unsigned cnt);
int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
-void ipath_free_pddata(struct ipath_devdata *, u32, int);
+void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
int ipath_parse_ushort(const char *str, unsigned short *valp);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
void ipath_kreceive(struct ipath_devdata *);
int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
int ipath_reset_device(int);
void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->tidcursor
/*
* values for ipath_flags
@@ -634,6 +682,15 @@ void ipath_get_faststats(unsigned long);
/* can miss port0 rx interrupts */
#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
+ /* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
/* portdata flag bit offsets */
/* waiting for a packet to arrive */
@@ -646,10 +703,8 @@ void ipath_free_data(struct ipath_portdata *dd);
int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
@@ -720,13 +775,8 @@ u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
* @port: port number
*
* Return the contents of a register that is virtualized to be per port.
- * Prints a debug message and returns -1 on errors (not distinguishable from
- * valid contents at runtime; we may add a separate error variable at some
- * point).
- *
- * This is normally not used by the kernel, but may be for debugging, and
- * has a different implementation than user mode, which is why it's not in
- * _common.h.
+ * Returns -1 on errors (not distinguishable from valid contents at
+ * runtime; we may add a separate error variable at some point).
*/
static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
ipath_ureg regno, int port)
@@ -810,7 +860,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
struct device_driver;
-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];
int ipath_driver_create_group(struct device_driver *);
void ipath_driver_remove_group(struct device_driver *);
@@ -819,12 +869,22 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
void ipath_device_remove_group(struct device *, struct ipath_devdata *);
int ipath_expose_reset(struct device *);
+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
int ipath_init_ipathfs(void);
void ipath_exit_ipathfs(void);
int ipathfs_add_device(struct ipath_devdata *);
int ipathfs_remove_device(struct ipath_devdata *);
/*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+ size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+
+/*
* Flush write combining store buffers (if present) and perform a write
* barrier.
*/
@@ -840,11 +900,12 @@ const char *ipath_get_unit_name(int unit);
extern struct mutex ipath_mutex;
-#define IPATH_DRV_NAME "ipath_core"
+#define IPATH_DRV_NAME "ib_ipath"
#define IPATH_MAJOR 233
-#define IPATH_SMA_MINOR 128
-#define IPATH_DIAG_MINOR 129
-#define IPATH_NMINORS 130
+#define IPATH_USER_MINOR_BASE 0
+#define IPATH_DIAGPKT_MINOR 127
+#define IPATH_DIAG_MINOR_BASE 129
+#define IPATH_NMINORS 255
#define ipath_dev_err(dd,fmt,...) \
do { \
@@ -880,4 +941,20 @@ extern struct mutex ipath_mutex;
#endif /* _IPATH_DEBUGGING */
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+ u64 mask;
+ const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t lmsg);
+
#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 5ae8761f9dd2..9a6cbd05adcd 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,6 +34,7 @@
#include <asm/io.h>
#include "ipath_verbs.h"
+#include "ipath_kernel.h"
/**
* ipath_alloc_lkey - allocate an lkey
@@ -59,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
- _VERBS_INFO("LKEY table full\n");
+ ipath_dbg(KERN_INFO "LKEY table full\n");
ret = 0;
goto bail;
}
@@ -116,10 +118,12 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
+ struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct ipath_mregion *mr;
+ unsigned n, m;
size_t off;
int ret;
@@ -137,7 +141,8 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
goto bail;
}
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
+ if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
@@ -151,20 +156,22 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
}
off += mr->offset;
- isge->mr = mr;
- isge->m = 0;
- isge->n = 0;
- while (off >= mr->map[isge->m]->segs[isge->n].length) {
- off -= mr->map[isge->m]->segs[isge->n].length;
- isge->n++;
- if (isge->n >= IPATH_SEGSZ) {
- isge->m++;
- isge->n = 0;
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= IPATH_SEGSZ) {
+ m++;
+ n = 0;
}
}
- isge->vaddr = mr->map[isge->m]->segs[isge->n].vaddr + off;
- isge->length = mr->map[isge->m]->segs[isge->n].length - off;
+ isge->mr = mr;
+ isge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ isge->length = mr->map[m]->segs[n].length - off;
isge->sge_length = sge->length;
+ isge->m = m;
+ isge->n = n;
ret = 1;
@@ -183,17 +190,35 @@ bail:
*
* Return 1 if successful, otherwise 0.
*/
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
+ unsigned n, m;
size_t off;
int ret;
+ /*
+ * We use RKEY == zero for physical addresses
+ * (see ipath_get_dma_mr).
+ */
+ if (rkey == 0) {
+ sge->mr = NULL;
+ sge->vaddr = phys_to_virt(vaddr);
+ sge->length = len;
+ sge->sge_length = len;
+ ss->sg_list = NULL;
+ ss->num_sge = 1;
+ ret = 1;
+ goto bail;
+ }
+
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
- if (unlikely(mr == NULL || mr->lkey != rkey)) {
+ if (unlikely(mr == NULL || mr->lkey != rkey ||
+ qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
@@ -206,20 +231,22 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
}
off += mr->offset;
- sge->mr = mr;
- sge->m = 0;
- sge->n = 0;
- while (off >= mr->map[sge->m]->segs[sge->n].length) {
- off -= mr->map[sge->m]->segs[sge->n].length;
- sge->n++;
- if (sge->n >= IPATH_SEGSZ) {
- sge->m++;
- sge->n = 0;
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= IPATH_SEGSZ) {
+ m++;
+ n = 0;
}
}
- sge->vaddr = mr->map[sge->m]->segs[sge->n].vaddr + off;
- sge->length = mr->map[sge->m]->segs[sge->n].length - off;
+ sge->mr = mr;
+ sge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ sge->length = mr->map[m]->segs[n].length - off;
sge->sge_length = len;
+ sge->m = m;
+ sge->n = n;
ss->sg_list = NULL;
ss->num_sge = 1;
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index 9ec4ac77b87f..e46aa4ed2a7e 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -40,27 +41,21 @@
#include <asm/byteorder.h>
#include "ipath_kernel.h"
-#include "ips_common.h"
#include "ipath_layer.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
/* Acquire before ipath_devs_lock. */
static DEFINE_MUTEX(ipath_layer_mutex);
-static int ipath_verbs_registered;
-
u16 ipath_layer_rcv_opcode;
static int (*layer_intr)(void *, u32);
static int (*layer_rcv)(void *, void *, struct sk_buff *);
static int (*layer_rcv_lid)(void *, void *);
-static int (*verbs_piobufavail)(void *);
-static void (*verbs_rcv)(void *, void *, void *, u32);
static void *(*layer_add_one)(int, struct ipath_devdata *);
static void (*layer_remove_one)(void *);
-static void *(*verbs_add_one)(int, struct ipath_devdata *);
-static void (*verbs_remove_one)(void *);
-static void (*verbs_timer_cb)(void *);
int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
{
@@ -106,288 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
return ret;
}
-int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
-{
- int ret = -ENODEV;
-
- if (dd->verbs_layer.l_arg && verbs_piobufavail)
- ret = verbs_piobufavail(dd->verbs_layer.l_arg);
-
- return ret;
-}
-
-int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
- u32 tlen)
-{
- int ret = -ENODEV;
-
- if (dd->verbs_layer.l_arg && verbs_rcv) {
- verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
- ret = 0;
- }
-
- return ret;
-}
-
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
- u32 lstate;
- int ret;
-
- switch (newstate) {
- case IPATH_IB_LINKDOWN:
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /* don't wait */
- ret = 0;
- goto bail;
-
- case IPATH_IB_LINKDOWN_SLEEP:
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /* don't wait */
- ret = 0;
- goto bail;
-
- case IPATH_IB_LINKDOWN_DISABLE:
- ipath_set_ib_lstate(dd,
- INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
- INFINIPATH_IBCC_LINKINITCMD_SHIFT);
- /* don't wait */
- ret = 0;
- goto bail;
-
- case IPATH_IB_LINKINIT:
- if (dd->ipath_flags & IPATH_LINKINIT) {
- ret = 0;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- lstate = IPATH_LINKINIT;
- break;
-
- case IPATH_IB_LINKARM:
- if (dd->ipath_flags & IPATH_LINKARMED) {
- ret = 0;
- goto bail;
- }
- if (!(dd->ipath_flags &
- (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
- ret = -EINVAL;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- /*
- * Since the port can transition to ACTIVE by receiving
- * a non VL 15 packet, wait for either state.
- */
- lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
- break;
-
- case IPATH_IB_LINKACTIVE:
- if (dd->ipath_flags & IPATH_LINKACTIVE) {
- ret = 0;
- goto bail;
- }
- if (!(dd->ipath_flags & IPATH_LINKARMED)) {
- ret = -EINVAL;
- goto bail;
- }
- ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
- INFINIPATH_IBCC_LINKCMD_SHIFT);
- lstate = IPATH_LINKACTIVE;
- break;
-
- default:
- ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
- ret = -EINVAL;
- goto bail;
- }
- ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
-
-/**
- * ipath_layer_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size. For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
- */
-int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
- u32 piosize;
- int changed = 0;
- int ret;
-
- /*
- * mtu is IB data payload max. It's the largest power of 2 less
- * than piosize (or even larger, since it only really controls the
- * largest we can receive; we can send the max of the mtu and
- * piosize). We check that it's one of the valid IB sizes.
- */
- if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
- arg != 4096) {
- ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
- ret = -EINVAL;
- goto bail;
- }
- if (dd->ipath_ibmtu == arg) {
- ret = 0; /* same as current */
- goto bail;
- }
-
- piosize = dd->ipath_ibmaxlen;
- dd->ipath_ibmtu = arg;
-
- if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
- /* Only if it's not the initial value (or reset to it) */
- if (piosize != dd->ipath_init_ibmaxlen) {
- dd->ipath_ibmaxlen = piosize;
- changed = 1;
- }
- } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
- piosize = arg + IPATH_PIO_MAXIBHDR;
- ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
- "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
- arg);
- dd->ipath_ibmaxlen = piosize;
- changed = 1;
- }
-
- if (changed) {
- /*
- * set the IBC maxpktlength to the size of our pio
- * buffers in words
- */
- u64 ibc = dd->ipath_ibcctrl;
- ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
- INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
- piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
- dd->ipath_ibmaxlen = piosize;
- piosize /= sizeof(u32); /* in words */
- /*
- * for ICRC, which we only send in diag test pkt mode, and
- * we don't need to worry about that for mtu
- */
- piosize += 1;
-
- ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
- dd->ipath_ibcctrl = ibc;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- dd->ipath_f_tidtemplate(dd);
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
-
-int ipath_set_sps_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+void ipath_layer_lid_changed(struct ipath_devdata *dd)
{
- ipath_stats.sps_lid[dd->ipath_unit] = arg;
- dd->ipath_lid = arg;
- dd->ipath_lmc = lmc;
-
mutex_lock(&ipath_layer_mutex);
if (dd->ipath_layer.l_arg && layer_intr)
layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
mutex_unlock(&ipath_layer_mutex);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(ipath_set_sps_lid);
-
-int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
-{
- /* XXX - need to inform anyone who cares this just happened. */
- dd->ipath_guid = guid;
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
-
-__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
-{
- return dd->ipath_guid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
-
-u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
-{
- return dd->ipath_nguid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
-
-int ipath_layer_query_device(struct ipath_devdata *dd, u32 * vendor,
- u32 * boardrev, u32 * majrev, u32 * minrev)
-{
- *vendor = dd->ipath_vendorid;
- *boardrev = dd->ipath_boardrev;
- *majrev = dd->ipath_majrev;
- *minrev = dd->ipath_minrev;
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_query_device);
-
-u32 ipath_layer_get_flags(struct ipath_devdata *dd)
-{
- return dd->ipath_flags;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
-
-struct device *ipath_layer_get_device(struct ipath_devdata *dd)
-{
- return &dd->pcidev->dev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_device);
-
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
-{
- return dd->ipath_deviceid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
-
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
-{
- return dd->ipath_lastibcstat;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
-
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
-{
- return dd->ipath_ibmtu;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
-
void ipath_layer_add(struct ipath_devdata *dd)
{
mutex_lock(&ipath_layer_mutex);
@@ -396,14 +119,10 @@ void ipath_layer_add(struct ipath_devdata *dd)
dd->ipath_layer.l_arg =
layer_add_one(dd->ipath_unit, dd);
- if (verbs_add_one)
- dd->verbs_layer.l_arg =
- verbs_add_one(dd->ipath_unit, dd);
-
mutex_unlock(&ipath_layer_mutex);
}
-void ipath_layer_del(struct ipath_devdata *dd)
+void ipath_layer_remove(struct ipath_devdata *dd)
{
mutex_lock(&ipath_layer_mutex);
@@ -412,11 +131,6 @@ void ipath_layer_del(struct ipath_devdata *dd)
dd->ipath_layer.l_arg = NULL;
}
- if (dd->verbs_layer.l_arg && verbs_remove_one) {
- verbs_remove_one(dd->verbs_layer.l_arg);
- dd->verbs_layer.l_arg = NULL;
- }
-
mutex_unlock(&ipath_layer_mutex);
}
@@ -448,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
if (dd->ipath_layer.l_arg)
continue;
- if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
- *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-
spin_unlock_irqrestore(&ipath_devs_lock, flags);
dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -494,107 +205,6 @@ void ipath_layer_unregister(void)
EXPORT_SYMBOL_GPL(ipath_layer_unregister);
-static void __ipath_verbs_timer(unsigned long arg)
-{
- struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
- /*
- * If port 0 receive packet interrupts are not available, or
- * can be missed, poll the receive queue
- */
- if (dd->ipath_flags & IPATH_POLL_RX_INTR)
- ipath_kreceive(dd);
-
- /* Handle verbs layer timeouts. */
- if (dd->verbs_layer.l_arg && verbs_timer_cb)
- verbs_timer_cb(dd->verbs_layer.l_arg);
-
- mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
-}
-
-/**
- * ipath_verbs_register - verbs layer registration
- * @l_piobufavail: callback for when PIO buffers become available
- * @l_rcv: callback for receiving a packet
- * @l_timer_cb: timer callback
- * @ipath_devdata: device data structure is put here
- */
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *arg),
- int (*l_piobufavail) (void *arg),
- void (*l_rcv) (void *arg, void *rhdr,
- void *data, u32 tlen),
- void (*l_timer_cb) (void *arg))
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
-
- verbs_add_one = l_add;
- verbs_remove_one = l_remove;
- verbs_piobufavail = l_piobufavail;
- verbs_rcv = l_rcv;
- verbs_timer_cb = l_timer_cb;
-
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- if (!(dd->ipath_flags & IPATH_INITTED))
- continue;
-
- if (dd->verbs_layer.l_arg)
- continue;
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- mutex_unlock(&ipath_layer_mutex);
-
- ipath_verbs_registered = 1;
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_register);
-
-void ipath_verbs_unregister(void)
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-
- if (dd->verbs_layer.l_arg && verbs_remove_one) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- verbs_remove_one(dd->verbs_layer.l_arg);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- dd->verbs_layer.l_arg = NULL;
- }
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- verbs_add_one = NULL;
- verbs_remove_one = NULL;
- verbs_piobufavail = NULL;
- verbs_rcv = NULL;
- verbs_timer_cb = NULL;
-
- ipath_verbs_registered = 0;
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
-
int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
{
int ret;
@@ -607,7 +217,7 @@ int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
goto bail;
}
- ret = ipath_setrcvhdrsize(dd, NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE);
+ ret = ipath_setrcvhdrsize(dd, IPATH_HEADER_QUEUE_WORDS);
if (ret < 0)
goto bail;
@@ -616,9 +226,9 @@ int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
if (*dd->ipath_statusp & IPATH_STATUS_IB_READY)
intval |= IPATH_LAYER_INT_IF_UP;
- if (ipath_stats.sps_lid[dd->ipath_unit])
+ if (dd->ipath_lid)
intval |= IPATH_LAYER_INT_LID;
- if (ipath_stats.sps_mlid[dd->ipath_unit])
+ if (dd->ipath_mlid)
intval |= IPATH_LAYER_INT_BCAST;
/*
* do this on open, in case low level is already up and
@@ -683,385 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
-{
- return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
- struct ipath_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr != NULL) {
- if (++sge->n >= IPATH_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
- return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
- return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
- data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
- data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
- return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
- return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
- return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
- data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
- data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
- return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
- u32 length)
-{
- u32 extra = 0;
- u32 data = 0;
- u32 last;
-
- while (1) {
- u32 len = ss->sge.length;
- u32 off;
-
- BUG_ON(len == 0);
- if (len > length)
- len = length;
- if (len > ss->sge.sge_length)
- len = ss->sge.sge_length;
- /* If the source address is not aligned, try to align it. */
- off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
- if (off) {
- u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
- ~(sizeof(u32) - 1));
- u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
- u32 y;
-
- y = sizeof(u32) - off;
- if (len > y)
- len = y;
- if (len + extra >= sizeof(u32)) {
- data |= set_upper_bits(v, extra *
- BITS_PER_BYTE);
- len = sizeof(u32) - extra;
- if (len == length) {
- last = data;
- break;
- }
- __raw_writel(data, piobuf);
- piobuf++;
- extra = 0;
- data = 0;
- } else {
- /* Clear unused upper bytes */
- data |= clear_upper_bytes(v, len, extra);
- if (len == length) {
- last = data;
- break;
- }
- extra += len;
- }
- } else if (extra) {
- /* Source address is aligned. */
- u32 *addr = (u32 *) ss->sge.vaddr;
- int shift = extra * BITS_PER_BYTE;
- int ushift = 32 - shift;
- u32 l = len;
-
- while (l >= sizeof(u32)) {
- u32 v = *addr;
-
- data |= set_upper_bits(v, shift);
- __raw_writel(data, piobuf);
- data = get_upper_bits(v, ushift);
- piobuf++;
- addr++;
- l -= sizeof(u32);
- }
- /*
- * We still have 'extra' number of bytes leftover.
- */
- if (l) {
- u32 v = *addr;
-
- if (l + extra >= sizeof(u32)) {
- data |= set_upper_bits(v, shift);
- len -= l + extra - sizeof(u32);
- if (len == length) {
- last = data;
- break;
- }
- __raw_writel(data, piobuf);
- piobuf++;
- extra = 0;
- data = 0;
- } else {
- /* Clear unused upper bytes */
- data |= clear_upper_bytes(v, l,
- extra);
- if (len == length) {
- last = data;
- break;
- }
- extra += l;
- }
- } else if (len == length) {
- last = data;
- break;
- }
- } else if (len == length) {
- u32 w;
-
- /*
- * Need to round up for the last dword in the
- * packet.
- */
- w = (len + 3) >> 2;
- __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
- piobuf += w - 1;
- last = ((u32 *) ss->sge.vaddr)[w - 1];
- break;
- } else {
- u32 w = len >> 2;
-
- __iowrite32_copy(piobuf, ss->sge.vaddr, w);
- piobuf += w;
-
- extra = len & (sizeof(u32) - 1);
- if (extra) {
- u32 v = ((u32 *) ss->sge.vaddr)[w];
-
- /* Clear unused upper bytes */
- data = clear_upper_bytes(v, extra, 0);
- }
- }
- update_sge(ss, len);
- length -= len;
- }
- /* Update address before sending packet. */
- update_sge(ss, length);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(last, piobuf);
- /* be sure trigger word is written */
- ipath_flush_wc();
-}
-
-/**
- * ipath_verbs_send - send a packet from the verbs layer
- * @dd: the infinipath device
- * @hdrwords: the number of works in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- *
- * This is like ipath_sma_send_pkt() in that we need to be able to send
- * packets after the chip is initialized (MADs) but also like
- * ipath_layer_send_hdr() since its used by the verbs layer.
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
- u32 *hdr, u32 len, struct ipath_sge_state *ss)
-{
- u32 __iomem *piobuf;
- u32 plen;
- int ret;
-
- /* +1 is for the qword padding of pbc */
- plen = hdrwords + ((len + 3) >> 2) + 1;
- if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
- ipath_dbg("packet len 0x%x too long, failing\n", plen);
- ret = -EINVAL;
- goto bail;
- }
-
- /* Get a PIO buffer to use. */
- piobuf = ipath_getpiobuf(dd, NULL);
- if (unlikely(piobuf == NULL)) {
- ret = -EBUSY;
- goto bail;
- }
-
- /*
- * Write len to control qword, no flags.
- * We have to flush after the PBC for correctness on some cpus
- * or WC buffer can be written out of order.
- */
- writeq(plen, piobuf);
- ipath_flush_wc();
- piobuf += 2;
- if (len == 0) {
- /*
- * If there is just the header portion, must flush before
- * writing last word of header for correctness, and after
- * the last header word (trigger word).
- */
- __iowrite32_copy(piobuf, hdr, hdrwords - 1);
- ipath_flush_wc();
- __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
- ipath_flush_wc();
- ret = 0;
- goto bail;
- }
-
- __iowrite32_copy(piobuf, hdr, hdrwords);
- piobuf += hdrwords;
-
- /* The common case is aligned and contained in one segment. */
- if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
- !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
- u32 w;
- u32 *addr = (u32 *) ss->sge.vaddr;
-
- /* Update address before sending packet. */
- update_sge(ss, len);
- /* Need to round up for the last dword in the packet. */
- w = (len + 3) >> 2;
- __iowrite32_copy(piobuf, addr, w - 1);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(addr[w - 1], piobuf + w - 1);
- /* be sure trigger word is written */
- ipath_flush_wc();
- ret = 0;
- goto bail;
- }
- copy_io(piobuf, ss, len);
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_send);
-
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
- u64 *rwords, u64 *spkts, u64 *rpkts,
- u64 *xmit_wait)
-{
- int ret;
-
- if (!(dd->ipath_flags & IPATH_INITTED)) {
- /* no hardware, freeze, etc. */
- ipath_dbg("unit %u not usable\n", dd->ipath_unit);
- ret = -EINVAL;
- goto bail;
- }
- *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
- *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
- *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
- *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
-
-/**
- * ipath_layer_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_layer_get_counters(struct ipath_devdata *dd,
- struct ipath_layer_counters *cntrs)
-{
- int ret;
-
- if (!(dd->ipath_flags & IPATH_INITTED)) {
- /* no hardware, freeze, etc. */
- ipath_dbg("unit %u not usable\n", dd->ipath_unit);
- ret = -EINVAL;
- goto bail;
- }
- cntrs->symbol_error_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
- cntrs->link_error_recovery_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
- cntrs->link_downed_counter =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
- cntrs->port_rcv_errors =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) +
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
- cntrs->port_rcv_remphys_errors =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
- cntrs->port_xmit_discards =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
- cntrs->port_xmit_data =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- cntrs->port_rcv_data =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
- cntrs->port_xmit_packets =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
- cntrs->port_rcv_packets =
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
-
-int ipath_layer_want_buffer(struct ipath_devdata *dd)
-{
- set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
-
int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
{
int ret = 0;
@@ -1086,10 +317,10 @@ int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
}
vlsllnh = *((__be16 *) hdr);
- if (vlsllnh != htons(IPS_LRH_BTH)) {
+ if (vlsllnh != htons(IPATH_LRH_BTH)) {
ipath_dbg("Warning: lrh[0] wrong (%x, not %x); "
"not sending\n", be16_to_cpu(vlsllnh),
- IPS_LRH_BTH);
+ IPATH_LRH_BTH);
ret = -EINVAL;
}
if (ret)
@@ -1133,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
}
EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
-
-int ipath_layer_enable_timer(struct ipath_devdata *dd)
-{
- /*
- * HT-400 has a design flaw where the chip and kernel idea
- * of the tail register don't always agree, and therefore we won't
- * get an interrupt on the next packet received.
- * If the board supports per packet receive interrupts, use it.
- * Otherwise, the timer function periodically checks for packets
- * to cover this case.
- * Either way, the timer is needed for verbs layer related
- * processing.
- */
- if (dd->ipath_flags & IPATH_GPIO_INTR) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
- 0x2074076542310ULL);
- /* Enable GPIO bit 2 interrupt */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
- (u64) (1 << 2));
- }
-
- init_timer(&dd->verbs_layer.l_timer);
- dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
- dd->verbs_layer.l_timer.data = (unsigned long)dd;
- dd->verbs_layer.l_timer.expires = jiffies + 1;
- add_timer(&dd->verbs_layer.l_timer);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
-
-int ipath_layer_disable_timer(struct ipath_devdata *dd)
-{
- /* Disable GPIO bit 2 interrupt */
- if (dd->ipath_flags & IPATH_GPIO_INTR)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
-
- del_timer_sync(&dd->verbs_layer.l_timer);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
-
-/**
- * ipath_layer_set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
- struct ipath_devdata *ss;
- unsigned long lflags;
-
- spin_lock_irqsave(&ipath_devs_lock, lflags);
-
- list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
- if (!(ss->ipath_flags & IPATH_INITTED))
- continue;
- if ((flags & IPATH_VERBS_KERNEL_SMA) &&
- !(*ss->ipath_statusp & IPATH_STATUS_SMA))
- *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
- else
- *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
-
-/**
- * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
-{
- return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
-
-/**
- * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
- unsigned ret;
-
- if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
- ret = 0;
- else
- ret = dd->ipath_pd[0]->port_pkeys[index];
-
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
-
-/**
- * ipath_layer_get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
- struct ipath_portdata *pd = dd->ipath_pd[0];
-
- memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
- int i;
- int ret;
-
- for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
- if (dd->ipath_pkeys[i] != key)
- continue;
- if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
- dd->ipath_pkeys[i] = 0;
- ret = 1;
- goto bail;
- }
- break;
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
- int i;
- u16 lkey = key & 0x7FFF;
- int any = 0;
- int ret;
-
- if (lkey == 0x7FFF) {
- ret = 0;
- goto bail;
- }
-
- /* Look for an empty slot or a matching PKEY. */
- for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
- if (!dd->ipath_pkeys[i]) {
- any++;
- continue;
- }
- /* If it matches exactly, try to increment the ref count */
- if (dd->ipath_pkeys[i] == key) {
- if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
- ret = 0;
- goto bail;
- }
- /* Lost the race. Look for an empty slot below. */
- atomic_dec(&dd->ipath_pkeyrefs[i]);
- any++;
- }
- /*
- * It makes no sense to have both the limited and unlimited
- * PKEY set at the same time since the unlimited one will
- * disable the limited one.
- */
- if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
- ret = -EEXIST;
- goto bail;
- }
- }
- if (!any) {
- ret = -EBUSY;
- goto bail;
- }
- for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
- if (!dd->ipath_pkeys[i] &&
- atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
- /* for ipathstats, etc. */
- ipath_stats.sps_pkeys[i] = lkey;
- dd->ipath_pkeys[i] = key;
- ret = 1;
- goto bail;
- }
- }
- ret = -EBUSY;
-
-bail:
- return ret;
-}
-
-/**
- * ipath_layer_set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
- struct ipath_portdata *pd;
- int i;
- int changed = 0;
-
- pd = dd->ipath_pd[0];
-
- for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
- u16 key = pkeys[i];
- u16 okey = pd->port_pkeys[i];
-
- if (key == okey)
- continue;
- /*
- * The value of this PKEY table entry is changing.
- * Remove the old entry in the hardware's array of PKEYs.
- */
- if (okey & 0x7FFF)
- changed |= rm_pkey(dd, okey);
- if (key & 0x7FFF) {
- int ret = add_pkey(dd, key);
-
- if (ret < 0)
- key = 0;
- else
- changed |= ret;
- }
- pd->port_pkeys[i] = key;
- }
- if (changed) {
- u64 pkey;
-
- pkey = (u64) dd->ipath_pkeys[0] |
- ((u64) dd->ipath_pkeys[1] << 16) |
- ((u64) dd->ipath_pkeys[2] << 32) |
- ((u64) dd->ipath_pkeys[3] << 48);
- ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
- (unsigned long long) pkey);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
- pkey);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
-
-/**
- * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
- return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
-
-/**
- * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
- int sleep)
-{
- if (sleep)
- dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
- else
- dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
-
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
-{
- return (dd->ipath_ibcctrl >>
- INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
-
-/**
- * ipath_layer_set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
- unsigned v;
-
- v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
- if (v != n) {
- dd->ipath_ibcctrl &=
- ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
- INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
- dd->ipath_ibcctrl |=
- (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
-
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
-{
- return (dd->ipath_ibcctrl >>
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
-
-/**
- * ipath_layer_set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
- unsigned v;
-
- v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
- if (v != n) {
- dd->ipath_ibcctrl &=
- ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
- INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
- dd->ipath_ibcctrl |=
- (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
- dd->ipath_ibcctrl);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
-
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen)
-{
- return dd->ipath_f_get_boardname(dd, name, namelen);
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
-
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
-{
- return dd->ipath_rcvhdrentsize;
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 6fefd15bd2da..3854a4eae684 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -39,71 +40,9 @@
*/
struct sk_buff;
-struct ipath_sge_state;
struct ipath_devdata;
struct ether_header;
-struct ipath_layer_counters {
- u64 symbol_error_counter;
- u64 link_error_recovery_counter;
- u64 link_downed_counter;
- u64 port_rcv_errors;
- u64 port_rcv_remphys_errors;
- u64 port_xmit_discards;
- u64 port_xmit_data;
- u64 port_rcv_data;
- u64 port_xmit_packets;
- u64 port_rcv_packets;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
- void *vaddr;
- size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
- struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
- u64 user_base; /* User's address for this region */
- u64 iova; /* IB start address of this region */
- size_t length;
- u32 lkey;
- u32 offset; /* offset (bytes) to start of region */
- int access_flags;
- u32 max_segs; /* number of ipath_segs in all the arrays */
- u32 mapsz; /* size of the map array */
- struct ipath_segarray *map[0]; /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
- struct ipath_mregion *mr;
- void *vaddr; /* current pointer into the segment */
- u32 sge_length; /* length of the SGE */
- u32 length; /* remaining length of the segment */
- u16 m; /* current index: mr->map[m] */
- u16 n; /* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
- struct ipath_sge *sg_list; /* next SGE to be used if any */
- struct ipath_sge sge; /* progress state for the current SGE */
- u8 num_sge;
-};
-
int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
void (*l_remove)(void *),
int (*l_intr)(void *, u32),
@@ -111,60 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
struct sk_buff *),
u16 rcv_opcode,
int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *arg),
- int (*l_piobufavail)(void *arg),
- void (*l_rcv)(void *arg, void *rhdr,
- void *data, u32 tlen),
- void (*l_timer_cb)(void *arg));
void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
u16 ipath_layer_get_lid(struct ipath_devdata *dd);
int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_sps_lid(struct ipath_devdata *, u32, u8);
int ipath_layer_send_hdr(struct ipath_devdata *dd,
struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
- u32 * hdr, u32 len, struct ipath_sge_state *ss);
int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
- u64 *rwords, u64 *spkts, u64 *rpkts,
- u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
- struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
-int ipath_layer_query_device(struct ipath_devdata *, u32 * vendor,
- u32 * boardrev, u32 * majrev, u32 * minrev);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
- int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
/* ipath_ether interrupt values */
#define IPATH_LAYER_INT_IF_UP 0x2
@@ -173,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
#define IPATH_LAYER_INT_BCAST 0x40
-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
extern unsigned ipath_debug; /* debugging bit mask */
#endif /* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 1a9d0a2c33c3..25908b02fbe5 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,7 +35,7 @@
#include "ipath_kernel.h"
#include "ipath_verbs.h"
-#include "ips_common.h"
+#include "ipath_common.h"
#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004)
#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008)
@@ -84,9 +85,10 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
{
struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
struct ipath_devdata *dd = to_idev(ibdev)->dd;
- u32 vendor, boardid, majrev, minrev;
+ u32 vendor, majrev, minrev;
- if (smp->attr_mod)
+ /* GUID 0 is illegal */
+ if (smp->attr_mod || (dd->ipath_guid == 0))
smp->status |= IB_SMP_INVALID_FIELD;
nip->base_version = 1;
@@ -100,13 +102,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
nip->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
nip->sys_guid = to_idev(ibdev)->sys_image_guid;
- nip->node_guid = ipath_layer_get_guid(dd);
+ nip->node_guid = dd->ipath_guid;
nip->port_guid = nip->sys_guid;
- nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
- nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
- ipath_layer_query_device(dd, &vendor, &boardid, &majrev, &minrev);
+ nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+ nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+ majrev = dd->ipath_majrev;
+ minrev = dd->ipath_minrev;
nip->revision = cpu_to_be32((majrev << 16) | minrev);
nip->local_port_num = port;
+ vendor = dd->ipath_vendorid;
nip->vendor_id[0] = 0;
nip->vendor_id[1] = vendor >> 8;
nip->vendor_id[2] = vendor;
@@ -128,15 +132,96 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
* We only support one GUID for now. If this changes, the
* portinfo.guid_cap field needs to be updated too.
*/
- if (startgx == 0)
- /* The first is a copy of the read-only HW GUID. */
- *p = ipath_layer_get_guid(to_idev(ibdev)->dd);
- else
+ if (startgx == 0) {
+ __be64 g = to_idev(ibdev)->dd->ipath_guid;
+ if (g == 0)
+ /* GUID 0 is illegal */
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ /* The first is a copy of the read-only HW GUID. */
+ *p = g;
+ } else
smp->status |= IB_SMP_INVALID_FIELD;
return reply(smp);
}
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+ return (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+ unsigned v;
+
+ v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+ if (v != n) {
+ dd->ipath_ibcctrl &=
+ ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+ dd->ipath_ibcctrl |=
+ (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ }
+ return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+ return (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+ unsigned v;
+
+ v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+ if (v != n) {
+ dd->ipath_ibcctrl &=
+ ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+ dd->ipath_ibcctrl |=
+ (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ }
+ return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+ return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
static int recv_subn_get_portinfo(struct ib_smp *smp,
struct ib_device *ibdev, u8 port)
{
@@ -163,7 +248,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
(dev->mkeyprot_resv_lmc >> 6) == 0)
pip->mkey = dev->mkey;
pip->gid_prefix = dev->gid_prefix;
- lid = ipath_layer_get_lid(dev->dd);
+ lid = dev->dd->ipath_lid;
pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
pip->sm_lid = cpu_to_be16(dev->sm_lid);
pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -174,14 +259,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
pip->link_width_supported = 3; /* 1x or 4x */
pip->link_width_active = 2; /* 4x */
pip->linkspeed_portstate = 0x10; /* 2.5Gbps */
- ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+ ibcstat = dev->dd->ipath_lastibcstat;
pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
pip->portphysstate_linkdown =
(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
- (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+ (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
- switch (ipath_layer_get_ibmtu(dev->dd)) {
+ switch (dev->dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
break;
@@ -214,8 +299,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
/* P_KeyViolations are counted by hardware. */
pip->pkey_violations =
- cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
- dev->n_pkey_violations) & 0xFFFF);
+ cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
+ dev->z_pkey_violations) & 0xFFFF);
pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
/* Only the hardware GUID is supported for now */
pip->guid_cap = 1;
@@ -223,8 +308,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
/* 32.768 usec. response time (guessing) */
pip->resv_resptimevalue = 3;
pip->localphyerrors_overrunerrors =
- (ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
- ipath_layer_get_overrunthreshold(dev->dd);
+ (get_phyerrthreshold(dev->dd) << 4) |
+ get_overrunthreshold(dev->dd);
/* pip->max_credit_hint; */
/* pip->link_roundtrip_latency[3]; */
@@ -234,6 +319,20 @@ bail:
return ret;
}
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+
+ memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+ return 0;
+}
+
static int recv_subn_get_pkeytable(struct ib_smp *smp,
struct ib_device *ibdev)
{
@@ -246,9 +345,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
memset(smp->data, 0, sizeof(smp->data));
if (startpx == 0) {
struct ipath_ibdev *dev = to_idev(ibdev);
- unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+ unsigned i, n = ipath_get_npkeys(dev->dd);
- ipath_layer_get_pkeys(dev->dd, p);
+ get_pkeys(dev->dd, p);
for (i = 0; i < n; i++)
q[i] = cpu_to_be16(p[i]);
@@ -266,6 +365,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
}
/**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+ if (sleep)
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+ else
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ return 0;
+}
+
+/**
* recv_subn_set_portinfo - set port information
* @smp: the incoming SM packet
* @ibdev: the infiniband device
@@ -287,7 +404,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
u8 state;
u16 lstate;
u32 mtu;
- int ret;
+ int ret, ore;
if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
goto err;
@@ -301,11 +418,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
lid = be16_to_cpu(pip->lid);
- if (lid != ipath_layer_get_lid(dev->dd)) {
+ if (lid != dev->dd->ipath_lid) {
/* Must be a valid unicast LID address. */
- if (lid == 0 || lid >= IPS_MULTICAST_LID_BASE)
+ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
goto err;
- ipath_set_sps_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
+ ipath_set_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
}
@@ -313,7 +430,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
smlid = be16_to_cpu(pip->sm_lid);
if (smlid != dev->sm_lid) {
/* Must be a valid unicast LID address. */
- if (smlid == 0 || smlid >= IPS_MULTICAST_LID_BASE)
+ if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
goto err;
dev->sm_lid = smlid;
event.event = IB_EVENT_SM_CHANGE;
@@ -339,11 +456,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case 0: /* NOP */
break;
case 1: /* SLEEP */
- if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+ if (set_linkdowndefaultstate(dev->dd, 1))
goto err;
break;
case 2: /* POLL */
- if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+ if (set_linkdowndefaultstate(dev->dd, 0))
goto err;
break;
default:
@@ -373,7 +490,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* XXX We have already partially updated our state! */
goto err;
}
- ipath_layer_set_mtu(dev->dd, mtu);
+ ipath_set_mtu(dev->dd, mtu);
dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
@@ -389,20 +506,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* later.
*/
if (pip->pkey_violations == 0)
- dev->n_pkey_violations =
- ipath_layer_get_cr_errpkey(dev->dd);
+ dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
if (pip->qkey_violations == 0)
dev->qkey_violations = 0;
- if (ipath_layer_set_phyerrthreshold(
- dev->dd,
- (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+ ore = pip->localphyerrors_overrunerrors;
+ if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
goto err;
- if (ipath_layer_set_overrunthreshold(
- dev->dd,
- (pip->localphyerrors_overrunerrors & 0xF)))
+ if (set_overrunthreshold(dev->dd, (ore & 0xF)))
goto err;
dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -420,7 +533,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* is down or is being set to down.
*/
state = pip->linkspeed_portstate & 0xF;
- flags = ipath_layer_get_flags(dev->dd);
+ flags = dev->dd->ipath_flags;
lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
goto err;
@@ -436,7 +549,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* FALLTHROUGH */
case IB_PORT_DOWN:
if (lstate == 0)
- if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+ if (get_linkdowndefaultstate(dev->dd))
lstate = IPATH_IB_LINKDOWN_SLEEP;
else
lstate = IPATH_IB_LINKDOWN;
@@ -448,7 +561,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
lstate = IPATH_IB_LINKDOWN_DISABLE;
else
goto err;
- ipath_layer_set_linkstate(dev->dd, lstate);
+ ipath_set_linkstate(dev->dd, lstate);
if (flags & IPATH_LINKACTIVE) {
event.event = IB_EVENT_PORT_ERR;
ib_dispatch_event(&event);
@@ -457,7 +570,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case IB_PORT_ARMED:
if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
break;
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
if (flags & IPATH_LINKACTIVE) {
event.event = IB_EVENT_PORT_ERR;
ib_dispatch_event(&event);
@@ -466,7 +579,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case IB_PORT_ACTIVE:
if (!(flags & IPATH_LINKARMED))
break;
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
event.event = IB_EVENT_PORT_ACTIVE;
ib_dispatch_event(&event);
break;
@@ -490,6 +603,152 @@ done:
return ret;
}
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (dd->ipath_pkeys[i] != key)
+ continue;
+ if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+ dd->ipath_pkeys[i] = 0;
+ ret = 1;
+ goto bail;
+ }
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+ int i;
+ u16 lkey = key & 0x7FFF;
+ int any = 0;
+ int ret;
+
+ if (lkey == 0x7FFF) {
+ ret = 0;
+ goto bail;
+ }
+
+ /* Look for an empty slot or a matching PKEY. */
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i]) {
+ any++;
+ continue;
+ }
+ /* If it matches exactly, try to increment the ref count */
+ if (dd->ipath_pkeys[i] == key) {
+ if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+ ret = 0;
+ goto bail;
+ }
+ /* Lost the race. Look for an empty slot below. */
+ atomic_dec(&dd->ipath_pkeyrefs[i]);
+ any++;
+ }
+ /*
+ * It makes no sense to have both the limited and unlimited
+ * PKEY set at the same time since the unlimited one will
+ * disable the limited one.
+ */
+ if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (!any) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i] &&
+ atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+ /* for ipathstats, etc. */
+ ipath_stats.sps_pkeys[i] = lkey;
+ dd->ipath_pkeys[i] = key;
+ ret = 1;
+ goto bail;
+ }
+ }
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+ struct ipath_portdata *pd;
+ int i;
+ int changed = 0;
+
+ pd = dd->ipath_pd[0];
+
+ for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+ u16 key = pkeys[i];
+ u16 okey = pd->port_pkeys[i];
+
+ if (key == okey)
+ continue;
+ /*
+ * The value of this PKEY table entry is changing.
+ * Remove the old entry in the hardware's array of PKEYs.
+ */
+ if (okey & 0x7FFF)
+ changed |= rm_pkey(dd, okey);
+ if (key & 0x7FFF) {
+ int ret = add_pkey(dd, key);
+
+ if (ret < 0)
+ key = 0;
+ else
+ changed |= ret;
+ }
+ pd->port_pkeys[i] = key;
+ }
+ if (changed) {
+ u64 pkey;
+
+ pkey = (u64) dd->ipath_pkeys[0] |
+ ((u64) dd->ipath_pkeys[1] << 16) |
+ ((u64) dd->ipath_pkeys[2] << 32) |
+ ((u64) dd->ipath_pkeys[3] << 48);
+ ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+ (unsigned long long) pkey);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+ pkey);
+ }
+ return 0;
+}
+
static int recv_subn_set_pkeytable(struct ib_smp *smp,
struct ib_device *ibdev)
{
@@ -497,13 +756,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
__be16 *p = (__be16 *) smp->data;
u16 *q = (u16 *) smp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
- unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+ unsigned i, n = ipath_get_npkeys(dev->dd);
for (i = 0; i < n; i++)
q[i] = be16_to_cpu(p[i]);
- if (startpx != 0 ||
- ipath_layer_set_pkeys(dev->dd, q) != 0)
+ if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
smp->status |= IB_SMP_INVALID_FIELD;
return recv_subn_get_pkeytable(smp, ibdev);
@@ -610,6 +868,9 @@ struct ib_pma_portcounters {
#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800)
#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
@@ -838,24 +1099,28 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
- struct ipath_layer_counters cntrs;
+ struct ipath_verbs_counters cntrs;
u8 port_select = p->port_select;
- ipath_layer_get_counters(dev->dd, &cntrs);
+ ipath_get_counters(dev->dd, &cntrs);
/* Adjust counters for any resets done. */
- cntrs.symbol_error_counter -= dev->n_symbol_error_counter;
+ cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
cntrs.link_error_recovery_counter -=
- dev->n_link_error_recovery_counter;
- cntrs.link_downed_counter -= dev->n_link_downed_counter;
+ dev->z_link_error_recovery_counter;
+ cntrs.link_downed_counter -= dev->z_link_downed_counter;
cntrs.port_rcv_errors += dev->rcv_errors;
- cntrs.port_rcv_errors -= dev->n_port_rcv_errors;
- cntrs.port_rcv_remphys_errors -= dev->n_port_rcv_remphys_errors;
- cntrs.port_xmit_discards -= dev->n_port_xmit_discards;
- cntrs.port_xmit_data -= dev->n_port_xmit_data;
- cntrs.port_rcv_data -= dev->n_port_rcv_data;
- cntrs.port_xmit_packets -= dev->n_port_xmit_packets;
- cntrs.port_rcv_packets -= dev->n_port_rcv_packets;
+ cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
+ cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
+ cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
+ cntrs.port_xmit_data -= dev->z_port_xmit_data;
+ cntrs.port_rcv_data -= dev->z_port_rcv_data;
+ cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
+ cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
+ cntrs.local_link_integrity_errors -=
+ dev->z_local_link_integrity_errors;
+ cntrs.excessive_buffer_overrun_errors -=
+ dev->z_excessive_buffer_overrun_errors;
memset(pmp->data, 0, sizeof(pmp->data));
@@ -893,6 +1158,16 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
else
p->port_xmit_discards =
cpu_to_be16((u16)cntrs.port_xmit_discards);
+ if (cntrs.local_link_integrity_errors > 0xFUL)
+ cntrs.local_link_integrity_errors = 0xFUL;
+ if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
+ cntrs.excessive_buffer_overrun_errors = 0xFUL;
+ p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+ cntrs.excessive_buffer_overrun_errors;
+ if (dev->n_vl15_dropped > 0xFFFFUL)
+ p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
+ else
+ p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped);
if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
else
@@ -924,14 +1199,14 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
u64 swords, rwords, spkts, rpkts, xwait;
u8 port_select = p->port_select;
- ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
- &rpkts, &xwait);
+ ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+ &rpkts, &xwait);
/* Adjust counters for any resets done. */
- swords -= dev->n_port_xmit_data;
- rwords -= dev->n_port_rcv_data;
- spkts -= dev->n_port_xmit_packets;
- rpkts -= dev->n_port_rcv_packets;
+ swords -= dev->z_port_xmit_data;
+ rwords -= dev->z_port_rcv_data;
+ spkts -= dev->z_port_xmit_packets;
+ rpkts -= dev->z_port_rcv_packets;
memset(pmp->data, 0, sizeof(pmp->data));
@@ -958,46 +1233,57 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
pmp->data;
struct ipath_ibdev *dev = to_idev(ibdev);
- struct ipath_layer_counters cntrs;
+ struct ipath_verbs_counters cntrs;
/*
* Since the HW doesn't support clearing counters, we save the
* current count and subtract it from future responses.
*/
- ipath_layer_get_counters(dev->dd, &cntrs);
+ ipath_get_counters(dev->dd, &cntrs);
if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
- dev->n_symbol_error_counter = cntrs.symbol_error_counter;
+ dev->z_symbol_error_counter = cntrs.symbol_error_counter;
if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
- dev->n_link_error_recovery_counter =
+ dev->z_link_error_recovery_counter =
cntrs.link_error_recovery_counter;
if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
- dev->n_link_downed_counter = cntrs.link_downed_counter;
+ dev->z_link_downed_counter = cntrs.link_downed_counter;
if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
- dev->n_port_rcv_errors =
+ dev->z_port_rcv_errors =
cntrs.port_rcv_errors + dev->rcv_errors;
if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
- dev->n_port_rcv_remphys_errors =
+ dev->z_port_rcv_remphys_errors =
cntrs.port_rcv_remphys_errors;
if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
- dev->n_port_xmit_discards = cntrs.port_xmit_discards;
+ dev->z_port_xmit_discards = cntrs.port_xmit_discards;
+
+ if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
+ dev->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+
+ if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
+ dev->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED)
+ dev->n_vl15_dropped = 0;
if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
- dev->n_port_xmit_data = cntrs.port_xmit_data;
+ dev->z_port_xmit_data = cntrs.port_xmit_data;
if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
- dev->n_port_rcv_data = cntrs.port_rcv_data;
+ dev->z_port_rcv_data = cntrs.port_rcv_data;
if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
- dev->n_port_xmit_packets = cntrs.port_xmit_packets;
+ dev->z_port_xmit_packets = cntrs.port_xmit_packets;
if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
- dev->n_port_rcv_packets = cntrs.port_rcv_packets;
+ dev->z_port_rcv_packets = cntrs.port_rcv_packets;
return recv_pma_get_portcounters(pmp, ibdev, port);
}
@@ -1010,20 +1296,20 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
struct ipath_ibdev *dev = to_idev(ibdev);
u64 swords, rwords, spkts, rpkts, xwait;
- ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
- &rpkts, &xwait);
+ ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+ &rpkts, &xwait);
if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
- dev->n_port_xmit_data = swords;
+ dev->z_port_xmit_data = swords;
if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
- dev->n_port_rcv_data = rwords;
+ dev->z_port_rcv_data = rwords;
if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
- dev->n_port_xmit_packets = spkts;
+ dev->z_port_xmit_packets = spkts;
if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
- dev->n_port_rcv_packets = rpkts;
+ dev->z_port_rcv_packets = rpkts;
if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
dev->n_unicast_xmit = 0;
@@ -1272,32 +1558,8 @@ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
- struct ipath_ibdev *dev = to_idev(ibdev);
int ret;
- /*
- * Snapshot current HW counters to "clear" them.
- * This should be done when the driver is loaded except that for
- * some reason we get a zillion errors when brining up the link.
- */
- if (dev->rcv_errors == 0) {
- struct ipath_layer_counters cntrs;
-
- ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs);
- dev->rcv_errors++;
- dev->n_symbol_error_counter = cntrs.symbol_error_counter;
- dev->n_link_error_recovery_counter =
- cntrs.link_error_recovery_counter;
- dev->n_link_downed_counter = cntrs.link_downed_counter;
- dev->n_port_rcv_errors = cntrs.port_rcv_errors + 1;
- dev->n_port_rcv_remphys_errors =
- cntrs.port_rcv_remphys_errors;
- dev->n_port_xmit_discards = cntrs.port_xmit_discards;
- dev->n_port_xmit_data = cntrs.port_xmit_data;
- dev->n_port_rcv_data = cntrs.port_rcv_data;
- dev->n_port_xmit_packets = cntrs.port_xmit_packets;
- dev->n_port_rcv_packets = cntrs.port_rcv_packets;
- }
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 000000000000..11b7378ff214
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+ struct ipath_mmap_info *ip =
+ container_of(ref, struct ipath_mmap_info, ref);
+
+ vfree(ip->obj);
+ kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+ struct ipath_mmap_info *ip = vma->vm_private_data;
+
+ kref_get(&ip->ref);
+ ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+ struct ipath_mmap_info *ip = vma->vm_private_data;
+
+ ip->mmap_cnt--;
+ kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+ .open = ipath_vma_open,
+ .close = ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct ipath_ibdev *dev = to_idev(context->device);
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct ipath_mmap_info *ip, **pp;
+ int ret = -EINVAL;
+
+ /*
+ * Search the device's list of objects waiting for a mmap call.
+ * Normally, this list is very short since a call to create a
+ * CQ, QP, or SRQ is soon followed by a call to mmap().
+ */
+ spin_lock_irq(&dev->pending_lock);
+ for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+ /* Only the creator is allowed to mmap the object */
+ if (context != ip->context || (void *) offset != ip->obj)
+ continue;
+ /* Don't allow a mmap larger than the object. */
+ if (size > ip->size)
+ break;
+
+ *pp = ip->next;
+ spin_unlock_irq(&dev->pending_lock);
+
+ ret = remap_vmalloc_range(vma, ip->obj, 0);
+ if (ret)
+ goto done;
+ vma->vm_ops = &ipath_vm_ops;
+ vma->vm_private_data = ip;
+ ipath_vma_open(vma);
+ goto done;
+ }
+ spin_unlock_irq(&dev->pending_lock);
+done:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 69ffec66d45d..a0673c1eef71 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -35,6 +36,18 @@
#include "ipath_verbs.h"
+/* Fast memory region */
+struct ipath_fmr {
+ struct ib_fmr ibfmr;
+ u8 page_shift;
+ struct ipath_mregion mr; /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
/**
* ipath_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
@@ -125,6 +138,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
@@ -169,6 +183,11 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int n, m, i;
struct ib_mr *ret;
+ if (region->length == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
n += chunk->nents;
@@ -179,6 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail;
}
+ mr->mr.pd = pd;
mr->mr.user_base = region->user_base;
mr->mr.iova = region->virt_base;
mr->mr.length = region->length;
@@ -271,6 +291,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
+ fmr->mr.pd = pd;
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 9f8855d970c8..46c1c89bf6ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,7 +35,7 @@
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
-#include "ips_common.h"
+#include "ipath_kernel.h"
#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
@@ -43,19 +44,6 @@
#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
BITS_PER_PAGE, off)
-#define TRANS_INVALID 0
-#define TRANS_ANY2RST 1
-#define TRANS_RST2INIT 2
-#define TRANS_INIT2INIT 3
-#define TRANS_INIT2RTR 4
-#define TRANS_RTR2RTS 5
-#define TRANS_RTS2RTS 6
-#define TRANS_SQERR2RTS 7
-#define TRANS_ANY2ERR 8
-#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS 11 /* error if not drained */
-
/*
* Convert the AETH credit code into the number of credits.
*/
@@ -286,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
- _VERBS_INFO("QP memory leak!\n");
+ ipath_dbg(KERN_INFO "QP memory leak!\n");
qp = nqp;
}
}
@@ -332,10 +320,11 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
+ clear_bit(IPATH_S_BUSY, &qp->s_flags);
qp->s_hdrwords = 0;
qp->s_psn = 0;
qp->r_psn = 0;
- atomic_set(&qp->msn, 0);
+ qp->r_msn = 0;
if (qp->ibqp.qp_type == IB_QPT_RC) {
qp->s_state = IB_OPCODE_RC_SEND_LAST;
qp->r_state = IB_OPCODE_RC_SEND_LAST;
@@ -344,7 +333,9 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- qp->s_nak_state = 0;
+ qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+ qp->r_nak_state = 0;
+ qp->r_wrid_valid = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -352,26 +343,30 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
- qp->r_rq.head = 0;
- qp->r_rq.tail = 0;
+ qp->s_wait_credit = 0;
+ if (qp->r_rq.wq) {
+ qp->r_rq.wq->head = 0;
+ qp->r_rq.wq->tail = 0;
+ }
qp->r_reuse_sge = 0;
}
/**
* ipath_error_qp - put a QP into an error state
* @qp: the QP to put into an error state
+ * @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
- * QP r_rq.lock and s_lock should be held.
+ * QP s_lock should be held and interrupts disabled.
*/
-static void ipath_error_qp(struct ipath_qp *qp)
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
- _VERBS_INFO("QP%d/%d in error state\n",
- qp->ibqp.qp_num, qp->remote_qpn);
+ ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+ qp->ibqp.qp_num, qp->remote_qpn);
spin_lock(&dev->pending_lock);
/* XXX What if its already removed by the timeout code? */
@@ -381,7 +376,6 @@ static void ipath_error_qp(struct ipath_qp *qp)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.status = IB_WC_WR_FLUSH_ERR;
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
@@ -393,6 +387,12 @@ static void ipath_error_qp(struct ipath_qp *qp)
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
+ if (qp->r_wrid_valid) {
+ qp->r_wrid_valid = 0;
+ wc.status = err;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+ }
+ wc.status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
@@ -407,12 +407,31 @@ static void ipath_error_qp(struct ipath_qp *qp)
qp->s_hdrwords = 0;
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- wc.opcode = IB_WC_RECV;
- while (qp->r_rq.tail != qp->r_rq.head) {
- wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
- if (++qp->r_rq.tail >= qp->r_rq.size)
- qp->r_rq.tail = 0;
- ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ if (qp->r_rq.wq) {
+ struct ipath_rwq *wq;
+ u32 head;
+ u32 tail;
+
+ spin_lock(&qp->r_rq.lock);
+
+ /* sanity check pointers before trusting them */
+ wq = qp->r_rq.wq;
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ wc.opcode = IB_WC_RECV;
+ while (tail != head) {
+ wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+ if (++tail >= qp->r_rq.size)
+ tail = 0;
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wq->tail = tail;
+
+ spin_unlock(&qp->r_rq.lock);
}
}
@@ -421,11 +440,12 @@ static void ipath_error_qp(struct ipath_qp *qp)
* @ibqp: the queue pair who's attributes we're modifying
* @attr: the new attributes
* @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
*
* Returns 0 on success, otherwise returns an errno.
*/
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask)
+ int attr_mask, struct ib_udata *udata)
{
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
@@ -433,8 +453,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
unsigned long flags;
int ret;
- spin_lock_irqsave(&qp->r_rq.lock, flags);
- spin_lock(&qp->s_lock);
+ spin_lock_irqsave(&qp->s_lock, flags);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
@@ -444,26 +463,53 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr_mask))
goto inval;
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
if (attr->ah_attr.dlid == 0 ||
- attr->ah_attr.dlid >= IPS_MULTICAST_LID_BASE)
+ attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
+ goto inval;
+
+ if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+ (attr->ah_attr.grh.sgid_index > 1))
goto inval;
+ }
if (attr_mask & IB_QP_PKEY_INDEX)
- if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+ if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
goto inval;
if (attr_mask & IB_QP_MIN_RNR_TIMER)
if (attr->min_rnr_timer > 31)
goto inval;
+ if (attr_mask & IB_QP_PORT)
+ if (attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt)
+ goto inval;
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ if (attr->path_mtu > IB_MTU_4096)
+ goto inval;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > 1)
+ goto inval;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ if (attr->max_rd_atomic > 1)
+ goto inval;
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE)
+ if (attr->path_mig_state != IB_MIG_MIGRATED &&
+ attr->path_mig_state != IB_MIG_REARM)
+ goto inval;
+
switch (new_state) {
case IB_QPS_RESET:
ipath_reset_qp(qp);
break;
case IB_QPS_ERR:
- ipath_error_qp(qp);
+ ipath_error_qp(qp, IB_WC_GENERAL_ERR);
break;
default:
@@ -478,7 +524,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->remote_qpn = attr->dest_qp_num;
if (attr_mask & IB_QP_SQ_PSN) {
- qp->s_next_psn = attr->sq_psn;
+ qp->s_psn = qp->s_next_psn = attr->sq_psn;
qp->s_last_psn = qp->s_next_psn - 1;
}
@@ -505,34 +551,22 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (attr_mask & IB_QP_MIN_RNR_TIMER)
- qp->s_min_rnr_timer = attr->min_rnr_timer;
+ qp->r_min_rnr_timer = attr->min_rnr_timer;
+
+ if (attr_mask & IB_QP_TIMEOUT)
+ qp->timeout = attr->timeout;
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
- if (attr_mask & IB_QP_PKEY_INDEX)
- qp->s_pkey_index = attr->pkey_index;
-
qp->state = new_state;
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-
- /*
- * If QP1 changed to the RTS state, try to move to the link to INIT
- * even if it was ACTIVE so the SM will reinitialize the SMA's
- * state.
- */
- if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) {
- struct ipath_ibdev *dev = to_idev(ibqp->device);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
- }
ret = 0;
goto bail;
inval:
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
bail:
@@ -554,7 +588,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->dest_qp_num = qp->remote_qpn;
attr->qp_access_flags = qp->qp_access_flags;
attr->cap.max_send_wr = qp->s_size - 1;
- attr->cap.max_recv_wr = qp->r_rq.size - 1;
+ attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
attr->cap.max_send_sge = qp->s_max_sge;
attr->cap.max_recv_sge = qp->r_rq.max_sge;
attr->cap.max_inline_data = 0;
@@ -566,9 +600,9 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->sq_draining = 0;
attr->max_rd_atomic = 1;
attr->max_dest_rd_atomic = 1;
- attr->min_rnr_timer = qp->s_min_rnr_timer;
+ attr->min_rnr_timer = qp->r_min_rnr_timer;
attr->port_num = 1;
- attr->timeout = 0;
+ attr->timeout = qp->timeout;
attr->retry_cnt = qp->s_retry_cnt;
attr->rnr_retry = qp->s_rnr_retry;
attr->alt_port_num = 0;
@@ -580,9 +614,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->recv_cq = qp->ibqp.recv_cq;
init_attr->srq = qp->ibqp.srq;
init_attr->cap = attr->cap;
- init_attr->sq_sig_type =
- (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
- ? IB_SIGNAL_REQ_WR : 0;
+ if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ else
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->qp_type = qp->ibqp.qp_type;
init_attr->port_num = 1;
return 0;
@@ -593,31 +628,37 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
* @qp: the queue pair to compute the AETH for
*
* Returns the AETH.
- *
- * The QP s_lock should be held.
*/
__be32 ipath_compute_aeth(struct ipath_qp *qp)
{
- u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK;
+ u32 aeth = qp->r_msn & IPATH_MSN_MASK;
- if (qp->s_nak_state) {
- aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT;
- } else if (qp->ibqp.srq) {
+ if (qp->ibqp.srq) {
/*
* Shared receive queues don't generate credits.
* Set the credit field to the invalid value.
*/
- aeth |= IPS_AETH_CREDIT_INVAL << IPS_AETH_CREDIT_SHIFT;
+ aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
} else {
u32 min, max, x;
u32 credits;
-
+ struct ipath_rwq *wq = qp->r_rq.wq;
+ u32 head;
+ u32 tail;
+
+ /* sanity check pointers before trusting them */
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
/*
* Compute the number of credits available (RWQEs).
* XXX Not holding the r_rq.lock here so there is a small
* chance that the pair of reads are not atomic.
*/
- credits = qp->r_rq.head - qp->r_rq.tail;
+ credits = head - tail;
if ((int)credits < 0)
credits += qp->r_rq.size;
/*
@@ -637,7 +678,7 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
else
min = x;
}
- aeth |= x << IPS_AETH_CREDIT_SHIFT;
+ aeth |= x << IPATH_AETH_CREDIT_SHIFT;
}
return cpu_to_be32(aeth);
}
@@ -663,12 +704,22 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
size_t sz;
struct ib_qp *ret;
- if (init_attr->cap.max_send_sge > 255 ||
- init_attr->cap.max_recv_sge > 255) {
+ if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
+ init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
+ init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs ||
+ init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
+ if (init_attr->cap.max_send_sge +
+ init_attr->cap.max_recv_sge +
+ init_attr->cap.max_send_wr +
+ init_attr->cap.max_recv_wr == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
switch (init_attr->qp_type) {
case IB_QPT_UC:
case IB_QPT_RC:
@@ -684,20 +735,38 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+ sz = sizeof(*qp);
+ if (init_attr->srq) {
+ struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+ sz += sizeof(*qp->r_sg_list) *
+ srq->rq.max_sge;
+ } else
+ sz += sizeof(*qp->r_sg_list) *
+ init_attr->cap.max_recv_sge;
+ qp = kmalloc(sz, GFP_KERNEL);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_swq;
}
- qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
- sz = sizeof(struct ipath_sge) *
- init_attr->cap.max_recv_sge +
- sizeof(struct ipath_rwqe);
- qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
- if (!qp->r_rq.wq) {
- kfree(qp);
- ret = ERR_PTR(-ENOMEM);
- goto bail;
+ if (init_attr->srq) {
+ sz = 0;
+ qp->r_rq.size = 0;
+ qp->r_rq.max_sge = 0;
+ qp->r_rq.wq = NULL;
+ init_attr->cap.max_recv_wr = 0;
+ init_attr->cap.max_recv_sge = 0;
+ } else {
+ qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+ qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+ sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+ sizeof(struct ipath_rwqe);
+ qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz);
+ if (!qp->r_rq.wq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
}
/*
@@ -708,9 +777,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
- tasklet_init(&qp->s_task,
- init_attr->qp_type == IB_QPT_RC ?
- ipath_do_rc_send : ipath_do_uc_send,
+ tasklet_init(&qp->s_task, ipath_do_ruc_send,
(unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
@@ -718,25 +785,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->s_wq = swq;
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
- qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
- qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
- 1 << IPATH_S_SIGNAL_REQ_WR : 0;
+ if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+ qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+ else
+ qp->s_flags = 0;
dev = to_idev(ibpd->device);
err = ipath_alloc_qpn(&dev->qp_table, qp,
init_attr->qp_type);
if (err) {
- vfree(swq);
- vfree(qp->r_rq.wq);
- kfree(qp);
ret = ERR_PTR(err);
- goto bail;
+ goto bail_rwq;
}
+ qp->ip = NULL;
ipath_reset_qp(qp);
-
- /* Tell the core driver that the kernel SMA is present. */
- if (init_attr->qp_type == IB_QPT_SMI)
- ipath_layer_set_verbs_flags(dev->dd,
- IPATH_VERBS_KERNEL_SMA);
break;
default:
@@ -747,8 +808,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
init_attr->cap.max_inline_data = 0;
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ struct ipath_mmap_info *ip;
+ __u64 offset = (__u64) qp->r_rq.wq;
+ int err;
+
+ err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_rwq;
+ }
+
+ if (qp->r_rq.wq) {
+ /* Allocate info for ipath_mmap(). */
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_rwq;
+ }
+ qp->ip = ip;
+ ip->context = ibpd->uobject->context;
+ ip->obj = qp->r_rq.wq;
+ kref_init(&ip->ref);
+ ip->mmap_cnt = 0;
+ ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ }
+
+ spin_lock(&dev->n_qps_lock);
+ if (dev->n_qps_allocated == ib_ipath_max_qps) {
+ spin_unlock(&dev->n_qps_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_qps_allocated++;
+ spin_unlock(&dev->n_qps_lock);
+
ret = &qp->ibqp;
+ goto bail;
+bail_ip:
+ kfree(qp->ip);
+bail_rwq:
+ vfree(qp->r_rq.wq);
+bail_qp:
+ kfree(qp);
+bail_swq:
+ vfree(swq);
bail:
return ret;
}
@@ -768,15 +884,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
struct ipath_ibdev *dev = to_idev(ibqp->device);
unsigned long flags;
- /* Tell the core driver that the kernel SMA is gone. */
- if (qp->ibqp.qp_type == IB_QPT_SMI)
- ipath_layer_set_verbs_flags(dev->dd, 0);
-
- spin_lock_irqsave(&qp->r_rq.lock, flags);
- spin_lock(&qp->s_lock);
+ spin_lock_irqsave(&qp->s_lock, flags);
qp->state = IB_QPS_ERR;
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
/* Stop the sending tasklet. */
tasklet_kill(&qp->s_task);
@@ -797,8 +910,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
if (atomic_read(&qp->refcount) != 0)
ipath_free_qp(&dev->qp_table, qp);
+ if (qp->ip)
+ kref_put(&qp->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
vfree(qp->s_wq);
- vfree(qp->r_rq.wq);
kfree(qp);
return 0;
}
@@ -850,8 +966,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
- _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
- qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+ ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+ qp->ibqp.qp_num, qp->remote_qpn, wc->status);
spin_lock(&dev->pending_lock);
/* XXX What if its already removed by the timeout code? */
@@ -888,18 +1004,18 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
*/
void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
{
- u32 credit = (aeth >> IPS_AETH_CREDIT_SHIFT) & IPS_AETH_CREDIT_MASK;
+ u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
/*
* If the credit is invalid, we can send
* as many packets as we like. Otherwise, we have to
* honor the credit field.
*/
- if (credit == IPS_AETH_CREDIT_INVAL) {
+ if (credit == IPATH_AETH_CREDIT_INVAL)
qp->s_lsn = (u32) -1;
- } else if (qp->s_lsn != (u32) -1) {
+ else if (qp->s_lsn != (u32) -1) {
/* Compute new LSN (i.e., MSN + credit) */
- credit = (aeth + credit_table[credit]) & IPS_MSN_MASK;
+ credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
if (ipath_cmp24(credit, qp->s_lsn) > 0)
qp->s_lsn = credit;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 493b1821a934..a504cf67f272 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,7 +32,7 @@
*/
#include "ipath_verbs.h"
-#include "ips_common.h"
+#include "ipath_kernel.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
@@ -41,14 +42,14 @@
* @qp: the QP who's SGE we're restarting
* @wqe: the work queue to initialize the QP's SGE from
*
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
*/
static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
{
struct ipath_ibdev *dev;
u32 len;
- len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) *
+ len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
ib_mtu_enum_to_int(qp->path_mtu);
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -72,11 +73,10 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
* Return bth0 if constructed; otherwise, return 0.
* Note the QP s_lock must be held.
*/
-static inline u32 ipath_make_rc_ack(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu)
+u32 ipath_make_rc_ack(struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 pmtu)
{
- struct ipath_sge_state *ss;
u32 hwords;
u32 len;
u32 bth0;
@@ -90,13 +90,12 @@ static inline u32 ipath_make_rc_ack(struct ipath_qp *qp,
*/
switch (qp->s_ack_state) {
case OP(RDMA_READ_REQUEST):
- ss = &qp->s_rdma_sge;
+ qp->s_cur_sge = &qp->s_rdma_sge;
len = qp->s_rdma_len;
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
- }
- else
+ } else
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
qp->s_rdma_len -= len;
bth0 = qp->s_ack_state << 24;
@@ -108,7 +107,7 @@ static inline u32 ipath_make_rc_ack(struct ipath_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- ss = &qp->s_rdma_sge;
+ qp->s_cur_sge = &qp->s_rdma_sge;
len = qp->s_rdma_len;
if (len > pmtu)
len = pmtu;
@@ -127,41 +126,50 @@ static inline u32 ipath_make_rc_ack(struct ipath_qp *qp,
* We have to prevent new requests from changing
* the r_sge state while a ipath_verbs_send()
* is in progress.
- * Changing r_state allows the receiver
- * to continue processing new packets.
- * We do it here now instead of above so
- * that we are sure the packet was sent before
- * changing the state.
*/
- qp->r_state = OP(RDMA_READ_RESPONSE_LAST);
qp->s_ack_state = OP(ACKNOWLEDGE);
- return 0;
+ bth0 = 0;
+ goto bail;
case OP(COMPARE_SWAP):
case OP(FETCH_ADD):
- ss = NULL;
+ qp->s_cur_sge = NULL;
len = 0;
- qp->r_state = OP(SEND_LAST);
- qp->s_ack_state = OP(ACKNOWLEDGE);
- bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24;
+ /*
+ * Set the s_ack_state so the receive interrupt handler
+ * won't try to send an ACK (out of order) until this one
+ * is actually sent.
+ */
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
ohdr->u.at.aeth = ipath_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic);
+ ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
hwords += sizeof(ohdr->u.at) / 4;
break;
default:
/* Send a regular ACK. */
- ss = NULL;
+ qp->s_cur_sge = NULL;
len = 0;
- qp->s_ack_state = OP(ACKNOWLEDGE);
- bth0 = qp->s_ack_state << 24;
- ohdr->u.aeth = ipath_compute_aeth(qp);
+ /*
+ * Set the s_ack_state so the receive interrupt handler
+ * won't try to send an ACK (out of order) until this one
+ * is actually sent.
+ */
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ if (qp->s_nak_state)
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->s_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
}
qp->s_hdrwords = hwords;
- qp->s_cur_sge = ss;
qp->s_cur_size = len;
+bail:
return bth0;
}
@@ -174,11 +182,11 @@ static inline u32 ipath_make_rc_ack(struct ipath_qp *qp,
* @bth2p: pointer to the BTH PSN word
*
* Return 1 if constructed; otherwise, return 0.
- * Note the QP s_lock must be held.
+ * Note the QP s_lock must be held and interrupts disabled.
*/
-static inline int ipath_make_rc_req(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu, u32 *bth0p, u32 *bth2p)
+int ipath_make_rc_req(struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 pmtu, u32 *bth0p, u32 *bth2p)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_sge_state *ss;
@@ -193,6 +201,18 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_rnr_timeout)
goto done;
+ /* Limit the number of packets sent without an ACK. */
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
+ qp->s_wait_credit = 1;
+ dev->n_rc_stalls++;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+ goto done;
+ }
+
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 0;
@@ -213,7 +233,7 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
goto done;
- qp->s_psn = wqe->psn = qp->s_next_psn;
+ wqe->psn = qp->s_next_psn;
newreq = 1;
}
/*
@@ -257,7 +277,7 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
break;
case IB_WR_RDMA_WRITE:
- if (newreq)
+ if (newreq && qp->s_lsn != (u32) -1)
qp->s_lsn++;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -283,8 +303,7 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
- /* Immediate data comes
- * after RETH */
+ /* Immediate data comes after RETH */
ohdr->u.rc.imm_data = wqe->wr.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -304,7 +323,8 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / 4;
if (newreq) {
- qp->s_lsn++;
+ if (qp->s_lsn != (u32) -1)
+ qp->s_lsn++;
/*
* Adjust s_next_psn to count the
* expected number of responses.
@@ -335,7 +355,8 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
wqe->wr.wr.atomic.compare_add);
hwords += sizeof(struct ib_atomic_eth) / 4;
if (newreq) {
- qp->s_lsn++;
+ if (qp->s_lsn != (u32) -1)
+ qp->s_lsn++;
wqe->lpsn = wqe->psn;
}
if (++qp->s_cur == qp->s_size)
@@ -352,9 +373,14 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
if (qp->s_tail >= qp->s_size)
qp->s_tail = 0;
}
- bth2 |= qp->s_psn++ & IPS_PSN_MASK;
+ bth2 |= qp->s_psn++ & IPATH_PSN_MASK;
if ((int)(qp->s_psn - qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
+ /*
+ * Put the QP on the pending list so lost ACKs will cause
+ * a retry. More than one request can be pending so the
+ * QP may already be on the dev->pending list.
+ */
spin_lock(&dev->pending_lock);
if (list_empty(&qp->timerwait))
list_add_tail(&qp->timerwait,
@@ -364,8 +390,8 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
case OP(RDMA_READ_RESPONSE_FIRST):
/*
- * This case can only happen if a send is restarted. See
- * ipath_restart_rc().
+ * This case can only happen if a send is restarted.
+ * See ipath_restart_rc().
*/
ipath_init_restart(qp, wqe);
/* FALLTHROUGH */
@@ -373,18 +399,12 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_state = OP(SEND_MIDDLE);
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
- bth2 = qp->s_psn++ & IPS_PSN_MASK;
+ bth2 = qp->s_psn++ & IPATH_PSN_MASK;
if ((int)(qp->s_psn - qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -415,18 +435,12 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_state = OP(RDMA_WRITE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
- bth2 = qp->s_psn++ & IPS_PSN_MASK;
+ bth2 = qp->s_psn++ & IPATH_PSN_MASK;
if ((int)(qp->s_psn - qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
- /*
- * Request an ACK every 1/2 MB to avoid retransmit
- * timeouts.
- */
- if (((wqe->length - len) % (512 * 1024)) == 0)
- bth2 |= 1 << 31;
len = pmtu;
break;
}
@@ -452,7 +466,7 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
* See ipath_restart_rc().
*/
ipath_init_restart(qp, wqe);
- len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) * pmtu;
+ len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
ohdr->u.rc.reth.rkey =
@@ -460,7 +474,7 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / 4;
- bth2 = qp->s_psn++ & IPS_PSN_MASK;
+ bth2 = qp->s_psn++ & IPATH_PSN_MASK;
if ((int)(qp->s_psn - qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = NULL;
@@ -484,6 +498,8 @@ static inline int ipath_make_rc_req(struct ipath_qp *qp,
*/
goto done;
}
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+ bth2 |= 1 << 31; /* Request ACK. */
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = ss;
@@ -496,189 +512,169 @@ done:
return 0;
}
-static inline void ipath_make_rc_grh(struct ipath_qp *qp,
- struct ib_global_route *grh,
- u32 nwords)
-{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-
- /* GRH header size in 32-bit words. */
- qp->s_hdrwords += 10;
- qp->s_hdr.u.l.grh.version_tclass_flow =
- cpu_to_be32((6 << 28) |
- (grh->traffic_class << 20) |
- grh->flow_label);
- qp->s_hdr.u.l.grh.paylen =
- cpu_to_be16(((qp->s_hdrwords - 12) + nwords +
- SIZE_OF_CRC) << 2);
- /* next_hdr is defined by C8-7 in ch. 8.4.1 */
- qp->s_hdr.u.l.grh.next_hdr = 0x1B;
- qp->s_hdr.u.l.grh.hop_limit = grh->hop_limit;
- /* The SGID is 32-bit aligned. */
- qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix;
- qp->s_hdr.u.l.grh.sgid.global.interface_id =
- ipath_layer_get_guid(dev->dd);
- qp->s_hdr.u.l.grh.dgid = grh->dgid;
-}
-
/**
- * ipath_do_rc_send - perform a send on an RC QP
- * @data: contains a pointer to the QP
+ * send_rc_ack - Construct an ACK packet and send it
+ * @qp: a pointer to the QP
*
- * Process entries in the send work queue until credit or queue is
- * exhausted. Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, after we drop the QP s_lock, two threads could send
- * packets out of order.
+ * This is called from ipath_rc_rcv() and only uses the receive
+ * side QP state.
+ * Note that RDMA reads are handled in the send side QP state and tasklet.
*/
-void ipath_do_rc_send(unsigned long data)
+static void send_rc_ack(struct ipath_qp *qp)
{
- struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- unsigned long flags;
u16 lrh0;
- u32 nwords;
- u32 extra_bytes;
u32 bth0;
- u32 bth2;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ u32 hwords;
+ struct ipath_ib_header hdr;
struct ipath_other_headers *ohdr;
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
- goto bail;
-
- if (unlikely(qp->remote_ah_attr.dlid ==
- ipath_layer_get_lid(dev->dd))) {
- struct ib_wc wc;
-
- /*
- * Pass in an uninitialized ib_wc to be consistent with
- * other places where ipath_ruc_loopback() is called.
- */
- ipath_ruc_loopback(qp, &wc);
- goto clear;
- }
-
- ohdr = &qp->s_hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr.u.l.oth;
-
-again:
- /* Check for a constructed packet to be sent. */
- if (qp->s_hdrwords != 0) {
- /*
- * If no PIO bufs are available, return. An interrupt will
- * call ipath_ib_piobufavail() when one is available.
- */
- _VERBS_INFO("h %u %p\n", qp->s_hdrwords, &qp->s_hdr);
- _VERBS_INFO("d %u %p %u %p %u %u %u %u\n", qp->s_cur_size,
- qp->s_cur_sge->sg_list,
- qp->s_cur_sge->num_sge,
- qp->s_cur_sge->sge.vaddr,
- qp->s_cur_sge->sge.sge_length,
- qp->s_cur_sge->sge.length,
- qp->s_cur_sge->sge.m,
- qp->s_cur_sge->sge.n);
- if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
- (u32 *) &qp->s_hdr, qp->s_cur_size,
- qp->s_cur_sge)) {
- ipath_no_bufs_available(qp, dev);
- goto bail;
- }
- dev->n_unicast_xmit++;
- /* Record that we sent the packet and s_hdr is empty. */
- qp->s_hdrwords = 0;
- }
-
- /*
- * The lock is needed to synchronize between setting
- * qp->s_ack_state, resend timer, and post_send().
- */
- spin_lock_irqsave(&qp->s_lock, flags);
-
- /* Sending responses has higher priority over sending requests. */
- if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
- (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
- bth2 = qp->s_ack_psn++ & IPS_PSN_MASK;
- else if (!ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2))
- goto done;
-
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
/* Construct the header. */
- extra_bytes = (4 - qp->s_cur_size) & 3;
- nwords = (qp->s_cur_size + extra_bytes) >> 2;
- lrh0 = IPS_LRH_BTH;
+ ohdr = &hdr.u.oth;
+ lrh0 = IPATH_LRH_BTH;
+ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
+ hwords = 6;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, nwords);
- lrh0 = IPS_LRH_GRH;
+ hwords += ipath_make_grh(dev, &hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ hwords, 0);
+ ohdr = &hdr.u.l.oth;
+ lrh0 = IPATH_LRH_GRH;
}
+ /* read pkey_index w/o lock (its atomic) */
+ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ if (qp->r_nak_state)
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->r_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
+ bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
+ ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
+ hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
+ } else
+ bth0 |= OP(ACKNOWLEDGE) << 24;
lrh0 |= qp->remote_ah_attr.sl << 4;
- qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
- SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
- bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
- bth0 |= extra_bytes << 20;
+ hdr.lrh[0] = cpu_to_be16(lrh0);
+ hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
+ hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[2] = cpu_to_be32(bth2);
+ ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
- /* Check for more work to do. */
- goto again;
+ /*
+ * If we can send the ACK, clear the ACK state.
+ */
+ if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
+ qp->r_ack_state = OP(ACKNOWLEDGE);
+ dev->n_unicast_xmit++;
+ } else {
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ dev->n_rc_qacks++;
+ spin_lock_irq(&qp->s_lock);
+ /* Don't coalesce if a RDMA read or atomic is pending. */
+ if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
+ qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
+ qp->s_ack_state = qp->r_ack_state;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+ qp->r_ack_state = OP(ACKNOWLEDGE);
+ }
+ spin_unlock_irq(&qp->s_lock);
-done:
- spin_unlock_irqrestore(&qp->s_lock, flags);
-clear:
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
-bail:
- return;
+ /* Call ipath_do_rc_send() in another thread. */
+ tasklet_hi_schedule(&qp->s_task);
+ }
}
-static void send_rc_ack(struct ipath_qp *qp)
+/**
+ * reset_psn - reset the QP state to send starting from PSN
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ *
+ * This is called from ipath_rc_rcv() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void reset_psn(struct ipath_qp *qp, u32 psn)
{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- u16 lrh0;
- u32 bth0;
- struct ipath_other_headers *ohdr;
+ u32 n = qp->s_last;
+ struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
+ u32 opcode;
- /* Construct the header. */
- ohdr = &qp->s_hdr.u.oth;
- lrh0 = IPS_LRH_BTH;
- /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
- qp->s_hdrwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, 0);
- ohdr = &qp->s_hdr.u.l.oth;
- lrh0 = IPS_LRH_GRH;
+ qp->s_cur = n;
+
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (ipath_cmp24(psn, wqe->psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
}
- bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
- ohdr->u.aeth = ipath_compute_aeth(qp);
- if (qp->s_ack_state >= OP(COMPARE_SWAP)) {
- bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24;
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic);
- qp->s_hdrwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
+
+ /* Find the work request opcode corresponding to the given PSN. */
+ opcode = wqe->wr.opcode;
+ for (;;) {
+ int diff;
+
+ if (++n == qp->s_size)
+ n = 0;
+ if (n == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, n);
+ diff = ipath_cmp24(psn, wqe->psn);
+ if (diff < 0)
+ break;
+ qp->s_cur = n;
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (diff == 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+ opcode = wqe->wr.opcode;
}
- else
- bth0 |= OP(ACKNOWLEDGE) << 24;
- lrh0 |= qp->remote_ah_attr.sl << 4;
- qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
- qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
- ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK);
/*
- * If we can send the ACK, clear the ACK state.
+ * Set the state to restart in the middle of a request.
+ * Don't change the s_sge, s_cur_sge, or s_cur_size.
+ * See ipath_do_rc_send().
*/
- if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr,
- 0, NULL) == 0) {
- qp->s_ack_state = OP(ACKNOWLEDGE);
- dev->n_rc_qacks++;
- dev->n_unicast_xmit++;
+ switch (opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
+ break;
+
+ case IB_WR_RDMA_READ:
+ qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ break;
+
+ default:
+ /*
+ * This case shouldn't happen since its only
+ * one PSN per req.
+ */
+ qp->s_state = OP(SEND_LAST);
}
+done:
+ qp->s_psn = psn;
}
/**
@@ -687,13 +683,12 @@ static void send_rc_ack(struct ipath_qp *qp)
* @psn: packet sequence number for the request
* @wc: the work completion request
*
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
*/
void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
{
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
struct ipath_ibdev *dev;
- u32 n;
/*
* If there are no requests pending, we are done.
@@ -735,62 +730,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
else
dev->n_rc_resends += (int)qp->s_psn - (int)psn;
- /*
- * If we are starting the request from the beginning, let the normal
- * send code handle initialization.
- */
- qp->s_cur = qp->s_last;
- if (ipath_cmp24(psn, wqe->psn) <= 0) {
- qp->s_state = OP(SEND_LAST);
- qp->s_psn = wqe->psn;
- } else {
- n = qp->s_cur;
- for (;;) {
- if (++n == qp->s_size)
- n = 0;
- if (n == qp->s_tail) {
- if (ipath_cmp24(psn, qp->s_next_psn) >= 0) {
- qp->s_cur = n;
- wqe = get_swqe_ptr(qp, n);
- }
- break;
- }
- wqe = get_swqe_ptr(qp, n);
- if (ipath_cmp24(psn, wqe->psn) < 0)
- break;
- qp->s_cur = n;
- }
- qp->s_psn = psn;
-
- /*
- * Reset the state to restart in the middle of a request.
- * Don't change the s_sge, s_cur_sge, or s_cur_size.
- * See ipath_do_rc_send().
- */
- switch (wqe->wr.opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_IMM:
- qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
- break;
-
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
- break;
-
- case IB_WR_RDMA_READ:
- qp->s_state =
- OP(RDMA_READ_RESPONSE_MIDDLE);
- break;
-
- default:
- /*
- * This case shouldn't happen since its only
- * one PSN per req.
- */
- qp->s_state = OP(SEND_LAST);
- }
- }
+ reset_psn(qp, psn);
done:
tasklet_hi_schedule(&qp->s_task);
@@ -799,66 +739,13 @@ bail:
return;
}
-/**
- * reset_psn - reset the QP state to send starting from PSN
- * @qp: the QP
- * @psn: the packet sequence number to restart at
- *
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held.
- */
-static void reset_psn(struct ipath_qp *qp, u32 psn)
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
{
- struct ipath_swqe *wqe;
- u32 n;
-
- n = qp->s_cur;
- wqe = get_swqe_ptr(qp, n);
- for (;;) {
- if (++n == qp->s_size)
- n = 0;
- if (n == qp->s_tail) {
- if (ipath_cmp24(psn, qp->s_next_psn) >= 0) {
- qp->s_cur = n;
- wqe = get_swqe_ptr(qp, n);
- }
- break;
- }
- wqe = get_swqe_ptr(qp, n);
- if (ipath_cmp24(psn, wqe->psn) < 0)
- break;
- qp->s_cur = n;
- }
- qp->s_psn = psn;
-
- /*
- * Set the state to restart in the middle of a
- * request. Don't change the s_sge, s_cur_sge, or
- * s_cur_size. See ipath_do_rc_send().
- */
- switch (wqe->wr.opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_IMM:
- qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
- break;
-
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
- break;
-
- case IB_WR_RDMA_READ:
- qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
- break;
-
- default:
- /*
- * This case shouldn't happen since its only
- * one PSN per req.
- */
- qp->s_state = OP(SEND_LAST);
+ if (qp->s_wait_credit) {
+ qp->s_wait_credit = 0;
+ tasklet_hi_schedule(&qp->s_task);
}
+ qp->s_last_psn = psn;
}
/**
@@ -867,9 +754,9 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
* @psn: the packet sequence number of the ACK
* @opcode: the opcode of the request that resulted in the ACK
*
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
+ * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
* for the given QP.
- * Called at interrupt level with the QP s_lock held.
+ * Called at interrupt level with the QP s_lock held and interrupts disabled.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
@@ -929,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* The last valid PSN seen is the previous
* request's.
*/
- qp->s_last_psn = wqe->psn - 1;
+ update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
ipath_restart_rc(qp, wqe->psn, &wc);
/*
@@ -988,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
- qp->s_last_psn = psn;
+ update_last_psn(qp, psn);
ret = 1;
goto bail;
@@ -1006,35 +893,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
if (qp->s_last == qp->s_tail)
goto bail;
- /* The last valid PSN seen is the previous request's. */
- qp->s_last_psn = wqe->psn - 1;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
dev->n_rc_resends += (int)qp->s_psn - (int)psn;
- /*
- * If we are starting the request from the beginning, let
- * the normal send code handle initialization.
- */
- qp->s_cur = qp->s_last;
- wqe = get_swqe_ptr(qp, qp->s_cur);
- if (ipath_cmp24(psn, wqe->psn) <= 0) {
- qp->s_state = OP(SEND_LAST);
- qp->s_psn = wqe->psn;
- } else
- reset_psn(qp, psn);
+ reset_psn(qp, psn);
qp->s_rnr_timeout =
- ib_ipath_rnr_table[(aeth >> IPS_AETH_CREDIT_SHIFT) &
- IPS_AETH_CREDIT_MASK];
+ ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
+ IPATH_AETH_CREDIT_MASK];
ipath_insert_rnr_queue(qp);
goto bail;
case 3: /* NAK */
/* The last valid PSN seen is the previous request's. */
if (qp->s_last != qp->s_tail)
- qp->s_last_psn = wqe->psn - 1;
- switch ((aeth >> IPS_AETH_CREDIT_SHIFT) &
- IPS_AETH_CREDIT_MASK) {
+ update_last_psn(qp, wqe->psn - 1);
+ switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
+ IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
dev->n_seq_naks++;
/*
@@ -1182,32 +1059,33 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done;
}
rdma_read:
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
- if (unlikely(tlen != (hdrsize + pmtu + 4)))
- goto ack_done;
- if (unlikely(pmtu >= qp->s_len))
- goto ack_done;
- /* We got a response so update the timeout. */
- if (unlikely(qp->s_last == qp->s_tail ||
- get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
- IB_WR_RDMA_READ))
- goto ack_done;
- spin_lock(&dev->pending_lock);
- if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
- list_move_tail(&qp->timerwait,
- &dev->pending[dev->pending_index]);
- spin_unlock(&dev->pending_lock);
- /*
- * Update the RDMA receive state but do the copy w/o holding the
- * locks and blocking interrupts. XXX Yet another place that
- * affects relaxed RDMA order since we don't want s_sge modified.
- */
- qp->s_len -= pmtu;
- qp->s_last_psn = psn;
- spin_unlock_irqrestore(&qp->s_lock, flags);
- ipath_copy_sge(&qp->s_sge, data, pmtu);
- goto bail;
+ if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
+ goto ack_done;
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto ack_done;
+ if (unlikely(pmtu >= qp->s_len))
+ goto ack_done;
+ /* We got a response so update the timeout. */
+ if (unlikely(qp->s_last == qp->s_tail ||
+ get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
+ IB_WR_RDMA_READ))
+ goto ack_done;
+ spin_lock(&dev->pending_lock);
+ if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
+ list_move_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+ /*
+ * Update the RDMA receive state but do the copy w/o
+ * holding the locks and blocking interrupts.
+ * XXX Yet another place that affects relaxed RDMA order
+ * since we don't want s_sge modified.
+ */
+ qp->s_len -= pmtu;
+ update_last_psn(qp, psn);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ ipath_copy_sge(&qp->s_sge, data, pmtu);
+ goto bail;
case OP(RDMA_READ_RESPONSE_LAST):
/* ACKs READ req. */
@@ -1230,18 +1108,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
* ICRC (4).
*/
if (unlikely(tlen <= (hdrsize + pad + 8))) {
- /*
- * XXX Need to generate an error CQ
- * entry.
- */
+ /* XXX Need to generate an error CQ entry. */
goto ack_done;
}
tlen -= hdrsize + pad + 8;
if (unlikely(tlen != qp->s_len)) {
- /*
- * XXX Need to generate an error CQ
- * entry.
- */
+ /* XXX Need to generate an error CQ entry. */
goto ack_done;
}
if (!header_in_data)
@@ -1254,9 +1126,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
/*
* Change the state so we contimue
- * processing new requests.
+ * processing new requests and wake up the
+ * tasklet if there are posted sends.
*/
qp->s_state = OP(SEND_LAST);
+ if (qp->s_tail != qp->s_head)
+ tasklet_hi_schedule(&qp->s_task);
}
goto ack_done;
}
@@ -1302,18 +1177,16 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* Don't queue the NAK if a RDMA read, atomic, or
* NAK is pending though.
*/
- spin_lock(&qp->s_lock);
- if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) &&
- qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) ||
- qp->s_nak_state != 0) {
- spin_unlock(&qp->s_lock);
+ if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
+ qp->r_nak_state != 0)
goto done;
+ if (qp->r_ack_state < OP(COMPARE_SWAP)) {
+ qp->r_ack_state = OP(SEND_ONLY);
+ qp->r_nak_state = IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
}
- qp->s_ack_state = OP(SEND_ONLY);
- qp->s_nak_state = IB_NAK_PSN_ERROR;
- /* Use the expected PSN. */
- qp->s_ack_psn = qp->r_psn;
- goto resched;
+ goto send_ack;
}
/*
@@ -1327,27 +1200,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* send the earliest so that RDMA reads can be restarted at
* the requester's expected PSN.
*/
- spin_lock(&qp->s_lock);
- if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE &&
- ipath_cmp24(psn, qp->s_ack_psn) >= 0) {
- if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST)
- qp->s_ack_psn = psn;
- spin_unlock(&qp->s_lock);
- goto done;
- }
- switch (opcode) {
- case OP(RDMA_READ_REQUEST):
- /*
- * We have to be careful to not change s_rdma_sge
- * while ipath_do_rc_send() is using it and not
- * holding the s_lock.
- */
- if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
- qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
- spin_unlock(&qp->s_lock);
- dev->n_rdma_dup_busy++;
- goto done;
- }
+ if (opcode == OP(RDMA_READ_REQUEST)) {
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1355,6 +1208,22 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
+ /*
+ * If we receive a duplicate RDMA request, it means the
+ * requester saw a sequence error and needs to restart
+ * from an earlier point. We can abort the current
+ * RDMA read send in that case.
+ */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
+ (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
+ /*
+ * We are already sending earlier requested data.
+ * Don't abort it to send later out of sequence data.
+ */
+ spin_unlock_irq(&qp->s_lock);
+ goto done;
+ }
qp->s_rdma_len = be32_to_cpu(reth->length);
if (qp->s_rdma_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
@@ -1365,11 +1234,13 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* Address range must be a subset of the original
* request and start on pmtu boundaries.
*/
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+ ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok))
+ if (unlikely(!ok)) {
+ spin_unlock_irq(&qp->s_lock);
goto done;
+ }
} else {
qp->s_rdma_sge.sg_list = NULL;
qp->s_rdma_sge.num_sge = 0;
@@ -1378,31 +1249,58 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
qp->s_rdma_sge.sge.length = 0;
qp->s_rdma_sge.sge.sge_length = 0;
}
- break;
+ qp->s_ack_state = opcode;
+ qp->s_ack_psn = psn;
+ spin_unlock_irq(&qp->s_lock);
+ tasklet_hi_schedule(&qp->s_task);
+ goto send_ack;
+ }
+
+ /*
+ * A pending RDMA read will ACK anything before it so
+ * ignore earlier duplicate requests.
+ */
+ if (qp->s_ack_state != OP(ACKNOWLEDGE))
+ goto done;
+ /*
+ * If an ACK is pending, don't replace the pending ACK
+ * with an earlier one since the later one will ACK the earlier.
+ * Also, if we already have a pending atomic, send it.
+ */
+ if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
+ (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
+ qp->r_ack_state >= OP(COMPARE_SWAP)))
+ goto send_ack;
+ switch (opcode) {
case OP(COMPARE_SWAP):
case OP(FETCH_ADD):
/*
- * Check for the PSN of the last atomic operations
+ * Check for the PSN of the last atomic operation
* performed and resend the result if found.
*/
- if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) {
- spin_unlock(&qp->s_lock);
+ if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
goto done;
- }
- qp->s_ack_atomic = qp->r_atomic_data;
break;
}
- qp->s_ack_state = opcode;
- qp->s_nak_state = 0;
- qp->s_ack_psn = psn;
-resched:
+ qp->r_ack_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = psn;
+send_ack:
return 0;
done:
return 1;
}
+static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+ spin_lock_irq(&qp->s_lock);
+ qp->state = IB_QPS_ERR;
+ ipath_error_qp(qp, err);
+ spin_unlock_irq(&qp->s_lock);
+}
+
/**
* ipath_rc_rcv - process an incoming RC packet
* @dev: the device this packet came in on
@@ -1424,13 +1322,16 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
u32 hdrsize;
u32 psn;
u32 pad;
- unsigned long flags;
struct ib_wc wc;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int diff;
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -1445,19 +1346,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* the eager header buffer size to 56 bytes so the last 4
* bytes of the BTH header (PSN) is in the data buffer.
*/
- header_in_data =
- ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
psn = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
} else
psn = be32_to_cpu(ohdr->bth[2]);
}
- /*
- * The opcode is in the low byte when its in network order
- * (top byte when in host order).
- */
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
/*
* Process responses (ACKs) before anything else. Note that the
@@ -1465,22 +1360,21 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* queue rather than the expected receive packet sequence number.
* In other words, this QP is the requester.
*/
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
hdrsize, pmtu, header_in_data);
- goto bail;
+ goto done;
}
- spin_lock_irqsave(&qp->r_rq.lock, flags);
-
/* Compute 24 bits worth of difference. */
diff = ipath_cmp24(psn, qp->r_psn);
if (unlikely(diff)) {
if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
psn, diff, header_in_data))
goto done;
- goto resched;
+ goto send_ack;
}
/* Check for opcode sequence errors. */
@@ -1492,22 +1386,18 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
nack_inv:
- /*
- * A NAK will ACK earlier sends and RDMA writes. Don't queue the
- * NAK if a RDMA read, atomic, or NAK is pending though.
- */
- spin_lock(&qp->s_lock);
- if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) &&
- qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) {
- spin_unlock(&qp->s_lock);
- goto done;
- }
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
- qp->s_ack_state = OP(SEND_ONLY);
- qp->s_nak_state = IB_NAK_INVALID_REQUEST;
- qp->s_ack_psn = qp->r_psn;
- goto resched;
+ /*
+ * A NAK will ACK earlier sends and RDMA writes.
+ * Don't queue the NAK if a RDMA read, atomic, or NAK
+ * is pending though.
+ */
+ if (qp->r_ack_state >= OP(COMPARE_SWAP))
+ goto send_ack;
+ ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
+ qp->r_ack_state = OP(SEND_ONLY);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
@@ -1517,20 +1407,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
break;
goto nack_inv;
- case OP(RDMA_READ_REQUEST):
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- /*
- * Drop all new requests until a response has been sent. A
- * new request then ACKs the RDMA response we sent. Relaxed
- * ordering would allow new requests to be processed but we
- * would need to keep a queue of rwqe's for all that are in
- * progress. Note that we can't RNR NAK this request since
- * the RDMA READ or atomic response is already queued to be
- * sent (unless we implement a response send queue).
- */
- goto done;
-
default:
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
@@ -1539,6 +1415,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
goto nack_inv;
+ /*
+ * Note that it is up to the requester to not send a new
+ * RDMA read or atomic operation before receiving an ACK
+ * for the previous operation.
+ */
break;
}
@@ -1555,17 +1436,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* Don't queue the NAK if a RDMA read or atomic
* is pending though.
*/
- spin_lock(&qp->s_lock);
- if (qp->s_ack_state >=
- OP(RDMA_READ_REQUEST) &&
- qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) {
- spin_unlock(&qp->s_lock);
- goto done;
- }
- qp->s_ack_state = OP(SEND_ONLY);
- qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer;
- qp->s_ack_psn = qp->r_psn;
- goto resched;
+ if (qp->r_ack_state >= OP(COMPARE_SWAP))
+ goto send_ack;
+ qp->r_ack_state = OP(SEND_ONLY);
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
}
qp->r_rcv_len = 0;
/* FALLTHROUGH */
@@ -1622,10 +1498,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
- atomic_inc(&qp->msn);
- if (opcode == OP(RDMA_WRITE_LAST) ||
- opcode == OP(RDMA_WRITE_ONLY))
+ qp->r_msn++;
+ if (!qp->r_wrid_valid)
break;
+ qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
@@ -1663,32 +1539,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->r_sge,
+ ok = ipath_rkey_ok(qp, &qp->r_sge,
qp->r_len, vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
- if (unlikely(!ok)) {
- nack_acc:
- /*
- * A NAK will ACK earlier sends and RDMA
- * writes. Don't queue the NAK if a RDMA
- * read, atomic, or NAK is pending though.
- */
- spin_lock(&qp->s_lock);
- if (qp->s_ack_state >=
- OP(RDMA_READ_REQUEST) &&
- qp->s_ack_state !=
- IB_OPCODE_ACKNOWLEDGE) {
- spin_unlock(&qp->s_lock);
- goto done;
- }
- /* XXX Flush WQEs */
- qp->state = IB_QPS_ERR;
- qp->s_ack_state = OP(RDMA_WRITE_ONLY);
- qp->s_nak_state =
- IB_NAK_REMOTE_ACCESS_ERROR;
- qp->s_ack_psn = qp->r_psn;
- goto resched;
- }
+ if (unlikely(!ok))
+ goto nack_acc;
} else {
qp->r_sge.sg_list = NULL;
qp->r_sge.sge.mr = NULL;
@@ -1715,12 +1570,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
- spin_lock(&qp->s_lock);
- if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
- qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
- spin_unlock(&qp->s_lock);
- goto done;
- }
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_READ)))
+ goto nack_acc;
+ spin_lock_irq(&qp->s_lock);
qp->s_rdma_len = be32_to_cpu(reth->length);
if (qp->s_rdma_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
@@ -1728,11 +1581,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+ ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
- spin_unlock(&qp->s_lock);
+ spin_unlock_irq(&qp->s_lock);
goto nack_acc;
}
/*
@@ -1749,21 +1602,25 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->s_rdma_sge.sge.length = 0;
qp->s_rdma_sge.sge.sge_length = 0;
}
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto nack_acc;
/*
* We need to increment the MSN here instead of when we
* finish sending the result since a duplicate request would
* increment it more than once.
*/
- atomic_inc(&qp->msn);
+ qp->r_msn++;
+
qp->s_ack_state = opcode;
- qp->s_nak_state = 0;
qp->s_ack_psn = psn;
+ spin_unlock_irq(&qp->s_lock);
+
qp->r_psn++;
qp->r_state = opcode;
- goto rdmadone;
+ qp->r_nak_state = 0;
+
+ /* Call ipath_do_rc_send() in another thread. */
+ tasklet_hi_schedule(&qp->s_task);
+
+ goto done;
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
@@ -1783,7 +1640,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc;
@@ -1792,7 +1649,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_acc;
/* Perform atomic OP and save result. */
sdata = be64_to_cpu(ateth->swap_data);
- spin_lock(&dev->pending_lock);
+ spin_lock_irq(&dev->pending_lock);
qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
if (opcode == OP(FETCH_ADD))
*(u64 *) qp->r_sge.sge.vaddr =
@@ -1800,9 +1657,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
else if (qp->r_atomic_data ==
be64_to_cpu(ateth->compare_data))
*(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock(&dev->pending_lock);
- atomic_inc(&qp->msn);
- qp->r_atomic_psn = psn & IPS_PSN_MASK;
+ spin_unlock_irq(&dev->pending_lock);
+ qp->r_msn++;
+ qp->r_atomic_psn = psn & IPATH_PSN_MASK;
psn |= 1 << 31;
break;
}
@@ -1813,44 +1670,38 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
qp->r_psn++;
qp->r_state = opcode;
+ qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & (1 << 31)) {
/*
* Coalesce ACKs unless there is a RDMA READ or
* ATOMIC pending.
*/
- spin_lock(&qp->s_lock);
- if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
- qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) {
- qp->s_ack_state = opcode;
- qp->s_nak_state = 0;
- qp->s_ack_psn = psn;
- qp->s_ack_atomic = qp->r_atomic_data;
- goto resched;
+ if (qp->r_ack_state < OP(COMPARE_SWAP)) {
+ qp->r_ack_state = opcode;
+ qp->r_ack_psn = psn;
}
- spin_unlock(&qp->s_lock);
+ goto send_ack;
}
-done:
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
- goto bail;
+ goto done;
-resched:
+nack_acc:
/*
- * Try to send ACK right away but not if ipath_do_rc_send() is
- * active.
+ * A NAK will ACK earlier sends and RDMA writes.
+ * Don't queue the NAK if a RDMA read, atomic, or NAK
+ * is pending though.
*/
- if (qp->s_hdrwords == 0 &&
- (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST ||
- qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP))
+ if (qp->r_ack_state < OP(COMPARE_SWAP)) {
+ ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+ qp->r_ack_state = OP(RDMA_WRITE_ONLY);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+ }
+send_ack:
+ /* Send ACK right away unless the send tasklet has a pending ACK. */
+ if (qp->s_ack_state == OP(ACKNOWLEDGE))
send_rc_ack(qp);
-rdmadone:
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
-
-bail:
+done:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 402126eb79c9..dffc76016d3c 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -35,8 +36,7 @@
/*
* This file should only be included by kernel source, and by the diags. It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
*/
/*
@@ -134,10 +134,24 @@
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
-#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
+/* monty specific -- find the rest in ipath_6120.c */
+#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
@@ -209,9 +223,9 @@
/* combination link status states that we use with some frequency */
#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
- << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
+ << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
(INFINIPATH_IBCS_LINKSTATE_MASK \
- <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
+ <<INFINIPATH_IBCS_LINKSTATE_SHIFT))
#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
@@ -282,10 +296,12 @@
#define INFINIPATH_XGXS_RESET 0x7ULL
#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
#define INFINIPATH_RT_VALID 0x8000000000000000ULL
#define INFINIPATH_RT_ADDR_SHIFT 0
#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
@@ -300,6 +316,17 @@
typedef u64 ipath_err_t;
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
@@ -308,13 +335,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
- * register bits for selecting i2c direction and values, used for I2C serial
- * flash
- */
-extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-extern u64 ipath_gpio_sda, ipath_gpio_scl;
-
-/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
* registers are calculated from one of the base registers. The use of
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index d38f4f3cfd1d..f7530512045d 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,6 +32,7 @@
*/
#include "ipath_verbs.h"
+#include "ipath_kernel.h"
/*
* Convert the AETH RNR timeout code into the number of milliseconds.
@@ -104,6 +106,52 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
spin_unlock_irqrestore(&dev->pending_lock, flags);
}
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+ int user = to_ipd(qp->ibqp.pd)->user;
+ int i, j, ret;
+ struct ib_wc wc;
+
+ qp->r_len = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if ((user && wqe->sg_list[i].lkey == 0) ||
+ !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
+ IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ qp->r_len += wqe->sg_list[i].length;
+ j++;
+ }
+ qp->r_sge.sge = qp->r_sg_list[0];
+ qp->r_sge.sg_list = qp->r_sg_list + 1;
+ qp->r_sge.num_sge = j;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.imm_data = 0;
+ wc.qp_num = qp->ibqp.qp_num;
+ wc.src_qp = 0;
+ wc.wc_flags = 0;
+ wc.pkey_index = 0;
+ wc.slid = 0;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ /* Signal solicited completion event. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
+
/**
* ipath_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
@@ -111,74 +159,77 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
*
* Return 0 if no RWQE is available, otherwise return 1.
*
- * Called at interrupt level with the QP r_rq.lock held.
+ * Can be called from interrupt level.
*/
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
+ unsigned long flags;
struct ipath_rq *rq;
+ struct ipath_rwq *wq;
struct ipath_srq *srq;
struct ipath_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
int ret;
- if (!qp->ibqp.srq) {
+ if (qp->ibqp.srq) {
+ srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
rq = &qp->r_rq;
- if (unlikely(rq->tail == rq->head)) {
+ }
+
+ spin_lock_irqsave(&rq->lock, flags);
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ do {
+ if (unlikely(tail == wq->head)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
ret = 0;
goto bail;
}
- wqe = get_rwqe_ptr(rq, rq->tail);
- qp->r_wr_id = wqe->wr_id;
- if (!wr_id_only) {
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->num_sge;
- qp->r_len = wqe->length;
- }
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- ret = 1;
- goto bail;
- }
-
- srq = to_isrq(qp->ibqp.srq);
- rq = &srq->rq;
- spin_lock(&rq->lock);
- if (unlikely(rq->tail == rq->head)) {
- spin_unlock(&rq->lock);
- ret = 0;
- goto bail;
- }
- wqe = get_rwqe_ptr(rq, rq->tail);
+ wqe = get_rwqe_ptr(rq, tail);
+ if (++tail >= rq->size)
+ tail = 0;
+ } while (!wr_id_only && !init_sge(qp, wqe));
qp->r_wr_id = wqe->wr_id;
- if (!wr_id_only) {
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->num_sge;
- qp->r_len = wqe->length;
- }
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- if (srq->ibsrq.event_handler) {
- struct ib_event ev;
+ wq->tail = tail;
+
+ ret = 1;
+ if (handler) {
u32 n;
- if (rq->head < rq->tail)
- n = rq->size + rq->head - rq->tail;
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
else
- n = rq->head - rq->tail;
+ n -= tail;
if (n < srq->limit) {
+ struct ib_event ev;
+
srq->limit = 0;
- spin_unlock(&rq->lock);
+ spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- srq->ibsrq.event_handler(&ev,
- srq->ibsrq.srq_context);
- } else
- spin_unlock(&rq->lock);
- } else
- spin_unlock(&rq->lock);
- ret = 1;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
+ qp->r_wrid_valid = 1;
bail:
return ret;
@@ -187,7 +238,6 @@ bail:
/**
* ipath_ruc_loopback - handle UC and RC lookback requests
* @sqp: the loopback QP
- * @wc: the work completion entry
*
* This is called from ipath_do_uc_send() or ipath_do_rc_send() to
* forward a WQE addressed to the same HCA.
@@ -196,13 +246,14 @@ bail:
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
-void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc)
+static void ipath_ruc_loopback(struct ipath_qp *sqp)
{
struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
struct ipath_qp *qp;
struct ipath_swqe *wqe;
struct ipath_sge *sge;
unsigned long flags;
+ struct ib_wc wc;
u64 sdata;
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
@@ -233,8 +284,8 @@ again:
wqe = get_swqe_ptr(sqp, sqp->s_last);
spin_unlock_irqrestore(&sqp->s_lock, flags);
- wc->wc_flags = 0;
- wc->imm_data = 0;
+ wc.wc_flags = 0;
+ wc.imm_data = 0;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
@@ -242,68 +293,63 @@ again:
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->imm_data = wqe->wr.imm_data;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.imm_data = wqe->wr.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
- spin_lock_irqsave(&qp->r_rq.lock, flags);
if (!ipath_get_rwqe(qp, 0)) {
rnr_nak:
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
if (sqp->s_rnr_retry == 0) {
- wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+ wc.status = IB_WC_RNR_RETRY_EXC_ERR;
goto err;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
dev->n_rnr_naks++;
sqp->s_rnr_timeout =
- ib_ipath_rnr_table[sqp->s_min_rnr_timer];
+ ib_ipath_rnr_table[sqp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto done;
}
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->imm_data = wqe->wr.imm_data;
- spin_lock_irqsave(&qp->r_rq.lock, flags);
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.imm_data = wqe->wr.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
if (wqe->length == 0)
break;
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE))) {
acc_err:
- wc->status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_REM_ACCESS_ERR;
err:
- wc->wr_id = wqe->wr.wr_id;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc->vendor_err = 0;
- wc->byte_len = 0;
- wc->qp_num = sqp->ibqp.qp_num;
- wc->src_qp = sqp->remote_qpn;
- wc->pkey_index = 0;
- wc->slid = sqp->remote_ah_attr.dlid;
- wc->sl = sqp->remote_ah_attr.sl;
- wc->dlid_path_bits = 0;
- wc->port_num = 0;
- ipath_sqerror_qp(sqp, wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.qp_num = sqp->ibqp.qp_num;
+ wc.src_qp = sqp->remote_qpn;
+ wc.pkey_index = 0;
+ wc.slid = sqp->remote_ah_attr.dlid;
+ wc.sl = sqp->remote_ah_attr.sl;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ ipath_sqerror_qp(sqp, &wc);
goto done;
}
break;
case IB_WR_RDMA_READ:
- if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
+ if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
@@ -318,7 +364,7 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
@@ -373,22 +419,22 @@ again:
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
- wc->opcode = IB_WC_RECV;
- wc->wr_id = qp->r_wr_id;
- wc->status = IB_WC_SUCCESS;
- wc->vendor_err = 0;
- wc->byte_len = wqe->length;
- wc->qp_num = qp->ibqp.qp_num;
- wc->src_qp = qp->remote_qpn;
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.vendor_err = 0;
+ wc.byte_len = wqe->length;
+ wc.qp_num = qp->ibqp.qp_num;
+ wc.src_qp = qp->remote_qpn;
/* XXX do we know which pkey matched? Only needed for GSI. */
- wc->pkey_index = 0;
- wc->slid = qp->remote_ah_attr.dlid;
- wc->sl = qp->remote_ah_attr.sl;
- wc->dlid_path_bits = 0;
+ wc.pkey_index = 0;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ wc.dlid_path_bits = 0;
/* Signal completion event if the solicited bit is set. */
- ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
@@ -396,19 +442,19 @@ send_comp:
if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- wc->wr_id = wqe->wr.wr_id;
- wc->status = IB_WC_SUCCESS;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc->vendor_err = 0;
- wc->byte_len = wqe->length;
- wc->qp_num = sqp->ibqp.qp_num;
- wc->src_qp = 0;
- wc->pkey_index = 0;
- wc->slid = 0;
- wc->sl = 0;
- wc->dlid_path_bits = 0;
- wc->port_num = 0;
- ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.vendor_err = 0;
+ wc.byte_len = wqe->length;
+ wc.qp_num = sqp->ibqp.qp_num;
+ wc.src_qp = 0;
+ wc.pkey_index = 0;
+ wc.slid = 0;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0);
}
/* Update s_last now that we are finished with the SWQE */
@@ -423,6 +469,15 @@ done:
wake_up(&qp->wait);
}
+static int want_buffer(struct ipath_devdata *dd)
+{
+ set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+
+ return 0;
+}
+
/**
* ipath_no_bufs_available - tell the layer driver we need buffers
* @qp: the QP that caused the problem
@@ -439,7 +494,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
/*
- * Note that as soon as ipath_layer_want_buffer() is called and
+ * Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
* could be called. If we are still in the tasklet function,
* tasklet_hi_schedule() will not call us until the next time
@@ -449,16 +504,16 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
*/
clear_bit(IPATH_S_BUSY, &qp->s_flags);
tasklet_unlock(&qp->s_task);
- ipath_layer_want_buffer(dev->dd);
+ want_buffer(dev->dd);
dev->n_piowait++;
}
/**
- * ipath_post_rc_send - post RC and UC sends
+ * ipath_post_ruc_send - post RC and UC sends
* @qp: the QP to post on
* @wr: the work request to send
*/
-int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
{
struct ipath_swqe *wqe;
unsigned long flags;
@@ -519,8 +574,7 @@ int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
}
if (wr->sg_list[i].length == 0)
continue;
- if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
- &wqe->sg_list[j], &wr->sg_list[i],
+ if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
acc)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
@@ -533,13 +587,148 @@ int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
qp->s_head = next;
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_UC)
- ipath_do_uc_send((unsigned long) qp);
- else
- ipath_do_rc_send((unsigned long) qp);
+ ipath_do_ruc_send((unsigned long) qp);
ret = 0;
bail:
return ret;
}
+
+/**
+ * ipath_make_grh - construct a GRH header
+ * @dev: a pointer to the ipath device
+ * @hdr: a pointer to the GRH header being constructed
+ * @grh: the global route address to send to
+ * @hwords: the number of 32 bit words of header being sent
+ * @nwords: the number of 32 bit words of data being sent
+ *
+ * Return the size of the header in 32 bit words.
+ */
+u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords)
+{
+ hdr->version_tclass_flow =
+ cpu_to_be32((6 << 28) |
+ (grh->traffic_class << 20) |
+ grh->flow_label);
+ hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
+ /* next_hdr is defined by C8-7 in ch. 8.4.1 */
+ hdr->next_hdr = 0x1B;
+ hdr->hop_limit = grh->hop_limit;
+ /* The SGID is 32-bit aligned. */
+ hdr->sgid.global.subnet_prefix = dev->gid_prefix;
+ hdr->sgid.global.interface_id = dev->dd->ipath_guid;
+ hdr->dgid = grh->dgid;
+
+ /* GRH header size in 32-bit words. */
+ return sizeof(struct ib_grh) / sizeof(u32);
+}
+
+/**
+ * ipath_do_ruc_send - perform a send on an RC or UC QP
+ * @data: contains a pointer to the QP
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * Otherwise, after we drop the QP s_lock, two threads could send
+ * packets out of order.
+ */
+void ipath_do_ruc_send(unsigned long data)
+{
+ struct ipath_qp *qp = (struct ipath_qp *)data;
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ unsigned long flags;
+ u16 lrh0;
+ u32 nwords;
+ u32 extra_bytes;
+ u32 bth0;
+ u32 bth2;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ struct ipath_other_headers *ohdr;
+
+ if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+ goto bail;
+
+ if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
+ ipath_ruc_loopback(qp);
+ goto clear;
+ }
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
+
+again:
+ /* Check for a constructed packet to be sent. */
+ if (qp->s_hdrwords != 0) {
+ /*
+ * If no PIO bufs are available, return. An interrupt will
+ * call ipath_ib_piobufavail() when one is available.
+ */
+ if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
+ (u32 *) &qp->s_hdr, qp->s_cur_size,
+ qp->s_cur_sge)) {
+ ipath_no_bufs_available(qp, dev);
+ goto bail;
+ }
+ dev->n_unicast_xmit++;
+ /* Record that we sent the packet and s_hdr is empty. */
+ qp->s_hdrwords = 0;
+ }
+
+ /*
+ * The lock is needed to synchronize between setting
+ * qp->s_ack_state, resend timer, and post_send().
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Sending responses has higher priority over sending requests. */
+ if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
+ (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
+ bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
+ else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
+ ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
+ ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
+ /*
+ * Clear the busy bit before unlocking to avoid races with
+ * adding new work queue items and then failing to process
+ * them.
+ */
+ clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
+ }
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Construct the header. */
+ extra_bytes = (4 - qp->s_cur_size) & 3;
+ nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ lrh0 = IPATH_LRH_BTH;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = IPATH_LRH_GRH;
+ }
+ lrh0 |= qp->remote_ah_attr.sl << 4;
+ qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
+ SIZE_OF_CRC);
+ qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+ bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 |= extra_bytes << 20;
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(bth2);
+
+ /* Check for more work to do. */
+ goto again;
+
+clear:
+ clear_bit(IPATH_S_BUSY, &qp->s_flags);
+bail:
+ return;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 01c4c6c56118..94033503400c 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -47,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
- struct ipath_ibdev *dev = to_idev(ibsrq->device);
+ struct ipath_rwq *wq;
unsigned long flags;
int ret;
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
- int i, j;
+ int i;
- if (wr->num_sge > srq->rq.max_sge) {
+ if ((unsigned) wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
- next = srq->rq.head + 1;
+ wq = srq->rq.wq;
+ next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
- if (next == srq->rq.tail) {
+ if (next == wq->tail) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
- wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+ wqe = get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
- wqe->sg_list[0].mr = NULL;
- wqe->sg_list[0].vaddr = NULL;
- wqe->sg_list[0].length = 0;
- wqe->sg_list[0].sge_length = 0;
- wqe->length = 0;
- for (i = 0, j = 0; i < wr->num_sge; i++) {
- /* Check LKEY */
- if (to_ipd(srq->ibsrq.pd)->user &&
- wr->sg_list[i].lkey == 0) {
- spin_unlock_irqrestore(&srq->rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- if (wr->sg_list[i].length == 0)
- continue;
- if (!ipath_lkey_ok(&dev->lk_table,
- &wqe->sg_list[j],
- &wr->sg_list[i],
- IB_ACCESS_LOCAL_WRITE)) {
- spin_unlock_irqrestore(&srq->rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- wqe->length += wr->sg_list[i].length;
- j++;
- }
- wqe->num_sge = j;
- srq->rq.head = next;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
@@ -125,46 +99,104 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
{
+ struct ipath_ibdev *dev = to_idev(ibpd->device);
struct ipath_srq *srq;
u32 sz;
struct ib_srq *ret;
- if (srq_init_attr->attr.max_sge < 1) {
+ if (srq_init_attr->attr.max_wr == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
+ (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
ret = ERR_PTR(-EINVAL);
- goto bail;
+ goto done;
}
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
if (!srq) {
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto done;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
srq->rq.size = srq_init_attr->attr.max_wr + 1;
- sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+ srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct ipath_rwqe);
- srq->rq.wq = vmalloc(srq->rq.size * sz);
+ srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
if (!srq->rq.wq) {
- kfree(srq);
ret = ERR_PTR(-ENOMEM);
- goto bail;
+ goto bail_srq;
}
/*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ struct ipath_mmap_info *ip;
+ __u64 offset = (__u64) srq->rq.wq;
+ int err;
+
+ err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_wq;
+ }
+
+ /* Allocate info for ipath_mmap(). */
+ ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+ if (!ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+ srq->ip = ip;
+ ip->context = ibpd->uobject->context;
+ ip->obj = srq->rq.wq;
+ kref_init(&ip->ref);
+ ip->mmap_cnt = 0;
+ ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+ srq->rq.size * sz);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ } else
+ srq->ip = NULL;
+
+ /*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
- srq->rq.head = 0;
- srq->rq.tail = 0;
- srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ srq->rq.wq->head = 0;
+ srq->rq.wq->tail = 0;
srq->limit = srq_init_attr->attr.srq_limit;
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
+
ret = &srq->ibsrq;
+ goto done;
-bail:
+bail_wq:
+ vfree(srq->rq.wq);
+
+bail_srq:
+ kfree(srq);
+
+done:
return ret;
}
@@ -173,76 +205,130 @@ bail:
* @ibsrq: the SRQ to modify
* @attr: the new attributes of the SRQ
* @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
*/
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask)
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
{
struct ipath_srq *srq = to_isrq(ibsrq);
- unsigned long flags;
- int ret;
+ int ret = 0;
- if (attr_mask & IB_SRQ_LIMIT) {
- spin_lock_irqsave(&srq->rq.lock, flags);
- srq->limit = attr->srq_limit;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- }
if (attr_mask & IB_SRQ_MAX_WR) {
- u32 size = attr->max_wr + 1;
- struct ipath_rwqe *wq, *p;
- u32 n;
- u32 sz;
+ struct ipath_rwq *owq;
+ struct ipath_rwq *wq;
+ struct ipath_rwqe *p;
+ u32 sz, size, n, head, tail;
- if (attr->max_sge < srq->rq.max_sge) {
+ /* Check that the requested sizes are below the limits. */
+ if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+ ((attr_mask & IB_SRQ_LIMIT) ?
+ attr->srq_limit : srq->limit) > attr->max_wr) {
ret = -EINVAL;
goto bail;
}
sz = sizeof(struct ipath_rwqe) +
- attr->max_sge * sizeof(struct ipath_sge);
- wq = vmalloc(size * sz);
+ srq->rq.max_sge * sizeof(struct ib_sge);
+ size = attr->max_wr + 1;
+ wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
if (!wq) {
ret = -ENOMEM;
goto bail;
}
- spin_lock_irqsave(&srq->rq.lock, flags);
- if (srq->rq.head < srq->rq.tail)
- n = srq->rq.size + srq->rq.head - srq->rq.tail;
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ __u64 offset_addr;
+ __u64 offset = (__u64) wq;
+
+ ret = ib_copy_from_udata(&offset_addr, udata,
+ sizeof(offset_addr));
+ if (ret) {
+ vfree(wq);
+ goto bail;
+ }
+ udata->outbuf = (void __user *) offset_addr;
+ ret = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (ret) {
+ vfree(wq);
+ goto bail;
+ }
+ }
+
+ spin_lock_irq(&srq->rq.lock);
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ owq = srq->rq.wq;
+ head = owq->head;
+ if (head >= srq->rq.size)
+ head = 0;
+ tail = owq->tail;
+ if (tail >= srq->rq.size)
+ tail = 0;
+ n = head;
+ if (n < tail)
+ n += srq->rq.size - tail;
else
- n = srq->rq.head - srq->rq.tail;
- if (size <= n || size <= srq->limit) {
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ n -= tail;
+ if (size <= n) {
+ spin_unlock_irq(&srq->rq.lock);
vfree(wq);
ret = -EINVAL;
goto bail;
}
n = 0;
- p = wq;
- while (srq->rq.tail != srq->rq.head) {
+ p = wq->wq;
+ while (tail != head) {
struct ipath_rwqe *wqe;
int i;
- wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+ wqe = get_rwqe_ptr(&srq->rq, tail);
p->wr_id = wqe->wr_id;
- p->length = wqe->length;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
p = (struct ipath_rwqe *)((char *) p + sz);
- if (++srq->rq.tail >= srq->rq.size)
- srq->rq.tail = 0;
+ if (++tail >= srq->rq.size)
+ tail = 0;
}
- vfree(srq->rq.wq);
srq->rq.wq = wq;
srq->rq.size = size;
- srq->rq.head = n;
- srq->rq.tail = 0;
- srq->rq.max_sge = attr->max_sge;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
- }
+ wq->head = n;
+ wq->tail = 0;
+ if (attr_mask & IB_SRQ_LIMIT)
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
- ret = 0;
+ vfree(owq);
+
+ if (srq->ip) {
+ struct ipath_mmap_info *ip = srq->ip;
+ struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+ ip->obj = wq;
+ ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+ size * sz);
+ spin_lock_irq(&dev->pending_lock);
+ ip->next = dev->pending_mmaps;
+ dev->pending_mmaps = ip;
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ } else if (attr_mask & IB_SRQ_LIMIT) {
+ spin_lock_irq(&srq->rq.lock);
+ if (attr->srq_limit >= srq->rq.size)
+ ret = -EINVAL;
+ else
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+ }
bail:
return ret;
@@ -265,8 +351,15 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
int ipath_destroy_srq(struct ib_srq *ibsrq)
{
struct ipath_srq *srq = to_isrq(ibsrq);
+ struct ipath_ibdev *dev = to_idev(ibsrq->device);
- vfree(srq->rq.wq);
+ spin_lock(&dev->n_srqs_lock);
+ dev->n_srqs_allocated--;
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
kfree(srq);
return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index fe209137ee74..30a825928fcf 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -185,7 +186,6 @@ static void ipath_qcheck(struct ipath_devdata *dd)
dd->ipath_port0head,
(unsigned long long)
ipath_stats.sps_port0pkts);
- ipath_kreceive(dd);
}
dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
}
@@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque)
}
}
- if (dd->ipath_nosma_bufs) {
- dd->ipath_nosma_secs += 5;
- if (dd->ipath_nosma_secs >= 30) {
- ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
- "cancelling pending sends\n",
- dd->ipath_nosma_secs);
- /*
- * issue an abort as well, in case we have a packet
- * stuck in launch fifo. This could corrupt an
- * outgoing user packet in the worst case,
- * but this is a pretty catastrophic, anyway.
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
- ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
- dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k -
- dd->ipath_lastport_piobuf);
- /* start again, if necessary */
- dd->ipath_nosma_secs = 0;
- } else
- ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
- "after %u seconds\n",
- dd->ipath_nosma_bufs,
- dd->ipath_nosma_secs);
- }
-
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index f323791cc495..182de34f9f47 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,8 +35,7 @@
#include <linux/pci.h>
#include "ipath_kernel.h"
-#include "ips_common.h"
-#include "ipath_layer.h"
+#include "ipath_common.h"
/**
* ipath_parse_ushort - parse an unsigned short value in an arbitrary base
@@ -75,7 +75,7 @@ bail:
static ssize_t show_version(struct device_driver *dev, char *buf)
{
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
+ return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
}
static ssize_t show_num_units(struct device_driver *dev, char *buf)
@@ -84,99 +84,6 @@ static ssize_t show_num_units(struct device_driver *dev, char *buf)
ipath_count_units(NULL, NULL, NULL));
}
-#define DRIVER_STAT(name, attr) \
- static ssize_t show_stat_##name(struct device_driver *dev, \
- char *buf) \
- { \
- return scnprintf( \
- buf, PAGE_SIZE, "%llu\n", \
- (unsigned long long) ipath_stats.sps_ ##attr); \
- } \
- static DRIVER_ATTR(name, S_IRUGO, show_stat_##name, NULL)
-
-DRIVER_STAT(intrs, ints);
-DRIVER_STAT(err_intrs, errints);
-DRIVER_STAT(errs, errs);
-DRIVER_STAT(pkt_errs, pkterrs);
-DRIVER_STAT(crc_errs, crcerrs);
-DRIVER_STAT(hw_errs, hwerrs);
-DRIVER_STAT(ib_link, iblink);
-DRIVER_STAT(port0_pkts, port0pkts);
-DRIVER_STAT(ether_spkts, ether_spkts);
-DRIVER_STAT(ether_rpkts, ether_rpkts);
-DRIVER_STAT(sma_spkts, sma_spkts);
-DRIVER_STAT(sma_rpkts, sma_rpkts);
-DRIVER_STAT(hdrq_full, hdrqfull);
-DRIVER_STAT(etid_full, etidfull);
-DRIVER_STAT(no_piobufs, nopiobufs);
-DRIVER_STAT(ports, ports);
-DRIVER_STAT(pkey0, pkeys[0]);
-DRIVER_STAT(pkey1, pkeys[1]);
-DRIVER_STAT(pkey2, pkeys[2]);
-DRIVER_STAT(pkey3, pkeys[3]);
-/* XXX fix the following when dynamic table of devices used */
-DRIVER_STAT(lid0, lid[0]);
-DRIVER_STAT(lid1, lid[1]);
-DRIVER_STAT(lid2, lid[2]);
-DRIVER_STAT(lid3, lid[3]);
-
-DRIVER_STAT(nports, nports);
-DRIVER_STAT(null_intr, nullintr);
-DRIVER_STAT(max_pkts_call, maxpkts_call);
-DRIVER_STAT(avg_pkts_call, avgpkts_call);
-DRIVER_STAT(page_locks, pagelocks);
-DRIVER_STAT(page_unlocks, pageunlocks);
-DRIVER_STAT(krdrops, krdrops);
-/* XXX fix the following when dynamic table of devices used */
-DRIVER_STAT(mlid0, mlid[0]);
-DRIVER_STAT(mlid1, mlid[1]);
-DRIVER_STAT(mlid2, mlid[2]);
-DRIVER_STAT(mlid3, mlid[3]);
-
-static struct attribute *driver_stat_attributes[] = {
- &driver_attr_intrs.attr,
- &driver_attr_err_intrs.attr,
- &driver_attr_errs.attr,
- &driver_attr_pkt_errs.attr,
- &driver_attr_crc_errs.attr,
- &driver_attr_hw_errs.attr,
- &driver_attr_ib_link.attr,
- &driver_attr_port0_pkts.attr,
- &driver_attr_ether_spkts.attr,
- &driver_attr_ether_rpkts.attr,
- &driver_attr_sma_spkts.attr,
- &driver_attr_sma_rpkts.attr,
- &driver_attr_hdrq_full.attr,
- &driver_attr_etid_full.attr,
- &driver_attr_no_piobufs.attr,
- &driver_attr_ports.attr,
- &driver_attr_pkey0.attr,
- &driver_attr_pkey1.attr,
- &driver_attr_pkey2.attr,
- &driver_attr_pkey3.attr,
- &driver_attr_lid0.attr,
- &driver_attr_lid1.attr,
- &driver_attr_lid2.attr,
- &driver_attr_lid3.attr,
- &driver_attr_nports.attr,
- &driver_attr_null_intr.attr,
- &driver_attr_max_pkts_call.attr,
- &driver_attr_avg_pkts_call.attr,
- &driver_attr_page_locks.attr,
- &driver_attr_page_unlocks.attr,
- &driver_attr_krdrops.attr,
- &driver_attr_mlid0.attr,
- &driver_attr_mlid1.attr,
- &driver_attr_mlid2.attr,
- &driver_attr_mlid3.attr,
- NULL
-};
-
-static struct attribute_group driver_stat_attr_group = {
- .name = "stats",
- .attrs = driver_stat_attributes
-};
-
static ssize_t show_status(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -200,8 +107,8 @@ static const char *ipath_status_str[] = {
"Initted",
"Disabled",
"Admin_Disabled",
- "OIB_SMA",
- "SMA",
+ "", /* This used to be the old "OIB_SMA" status. */
+ "", /* This used to be the old "SMA" status. */
"Present",
"IB_link_up",
"IB_configured",
@@ -272,23 +179,23 @@ static ssize_t store_lid(struct device *dev,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
- u16 lid;
+ u16 lid = 0;
int ret;
ret = ipath_parse_ushort(buf, &lid);
if (ret < 0)
goto invalid;
- if (lid == 0 || lid >= 0xc000) {
+ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
ret = -EINVAL;
goto invalid;
}
- ipath_set_sps_lid(dd, lid, 0);
+ ipath_set_lid(dd, lid, 0);
goto bail;
invalid:
- ipath_dev_err(dd, "attempt to set invalid LID\n");
+ ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
bail:
return ret;
}
@@ -313,14 +220,12 @@ static ssize_t store_mlid(struct device *dev,
int ret;
ret = ipath_parse_ushort(buf, &mlid);
- if (ret < 0)
+ if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
goto invalid;
unit = dd->ipath_unit;
dd->ipath_mlid = mlid;
- ipath_stats.sps_mlid[unit] = mlid;
- ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
goto bail;
invalid:
@@ -352,7 +257,7 @@ static ssize_t store_guid(struct device *dev,
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
unsigned short guid[8];
- __be64 nguid;
+ __be64 new_guid;
u8 *ng;
int i;
@@ -361,7 +266,7 @@ static ssize_t store_guid(struct device *dev,
&guid[4], &guid[5], &guid[6], &guid[7]) != 8)
goto invalid;
- ng = (u8 *) &nguid;
+ ng = (u8 *) &new_guid;
for (i = 0; i < 8; i++) {
if (guid[i] > 0xff)
@@ -369,7 +274,10 @@ static ssize_t store_guid(struct device *dev,
ng[i] = guid[i];
}
- dd->ipath_guid = nguid;
+ if (new_guid == 0)
+ goto invalid;
+
+ dd->ipath_guid = new_guid;
dd->ipath_nguid = 1;
ret = strlen(buf);
@@ -392,6 +300,16 @@ static ssize_t show_nguid(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
}
+static ssize_t show_nports(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ /* Return the number of user ports available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
static ssize_t show_serial(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -560,7 +478,7 @@ static ssize_t store_link_state(struct device *dev,
if (ret < 0)
goto invalid;
- r = ipath_layer_set_linkstate(dd, state);
+ r = ipath_set_linkstate(dd, state);
if (r < 0) {
ret = r;
goto bail;
@@ -595,7 +513,7 @@ static ssize_t store_mtu(struct device *dev,
if (ret < 0)
goto invalid;
- r = ipath_layer_set_mtu(dd, mtu);
+ r = ipath_set_mtu(dd, mtu);
if (r < 0)
ret = r;
@@ -656,6 +574,33 @@ bail:
return ret;
}
+static ssize_t store_rx_pol_inv(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret < 0)
+ goto invalid;
+
+ r = ipath_set_rx_pol_inv(dd, val);
+ if (r < 0) {
+ ret = r;
+ goto bail;
+ }
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+ return ret;
+}
+
+
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -676,12 +621,14 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
@@ -690,12 +637,14 @@ static struct attribute *dev_attributes[] = {
&dev_attr_mlid.attr,
&dev_attr_mtu.attr,
&dev_attr_nguid.attr,
+ &dev_attr_nports.attr,
&dev_attr_serial.attr,
&dev_attr_status.attr,
&dev_attr_status_str.attr,
&dev_attr_boardversion.attr,
&dev_attr_unit.attr,
&dev_attr_enabled.attr,
+ &dev_attr_rx_pol_inv.attr,
NULL
};
@@ -734,20 +683,12 @@ int ipath_driver_create_group(struct device_driver *drv)
int ret;
ret = sysfs_create_group(&drv->kobj, &driver_attr_group);
- if (ret)
- goto bail;
- ret = sysfs_create_group(&drv->kobj, &driver_stat_attr_group);
- if (ret)
- sysfs_remove_group(&drv->kobj, &driver_attr_group);
-
-bail:
return ret;
}
void ipath_driver_remove_group(struct device_driver *drv)
{
- sysfs_remove_group(&drv->kobj, &driver_stat_attr_group);
sysfs_remove_group(&drv->kobj, &driver_attr_group);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0d6dbc0a541e..e636cfd67a82 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -31,7 +32,7 @@
*/
#include "ipath_verbs.h"
-#include "ips_common.h"
+#include "ipath_kernel.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
@@ -61,90 +62,40 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
}
/**
- * ipath_do_uc_send - do a send on a UC queue
- * @data: contains a pointer to the QP to send on
+ * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
+ * @qp: a pointer to the QP
+ * @ohdr: a pointer to the IB header being constructed
+ * @pmtu: the path MTU
+ * @bth0p: pointer to the BTH opcode word
+ * @bth2p: pointer to the BTH PSN word
*
- * Process entries in the send work queue until the queue is exhausted.
- * Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, after we drop the QP lock, two threads could send
- * packets out of order.
- * This is similar to ipath_do_rc_send() below except we don't have
- * timeouts or resends.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note the QP s_lock must be held and interrupts disabled.
*/
-void ipath_do_uc_send(unsigned long data)
+int ipath_make_uc_req(struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 pmtu, u32 *bth0p, u32 *bth2p)
{
- struct ipath_qp *qp = (struct ipath_qp *)data;
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe;
- unsigned long flags;
- u16 lrh0;
u32 hwords;
- u32 nwords;
- u32 extra_bytes;
u32 bth0;
- u32 bth2;
- u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
u32 len;
- struct ipath_other_headers *ohdr;
struct ib_wc wc;
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
- goto bail;
-
- if (unlikely(qp->remote_ah_attr.dlid ==
- ipath_layer_get_lid(dev->dd))) {
- /* Pass in an uninitialized ib_wc to save stack space. */
- ipath_ruc_loopback(qp, &wc);
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
- goto bail;
- }
-
- ohdr = &qp->s_hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &qp->s_hdr.u.l.oth;
-
-again:
- /* Check for a constructed packet to be sent. */
- if (qp->s_hdrwords != 0) {
- /*
- * If no PIO bufs are available, return.
- * An interrupt will call ipath_ib_piobufavail()
- * when one is available.
- */
- if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
- (u32 *) &qp->s_hdr,
- qp->s_cur_size,
- qp->s_cur_sge)) {
- ipath_no_bufs_available(qp, dev);
- goto bail;
- }
- dev->n_unicast_xmit++;
- /* Record that we sent the packet and s_hdr is empty. */
- qp->s_hdrwords = 0;
- }
-
- lrh0 = IPS_LRH_BTH;
- /* header size in 32-bit words LRH+BTH = (8+12)/4. */
- hwords = 5;
-
- /*
- * The lock is needed to synchronize between
- * setting qp->s_ack_state and post_send().
- */
- spin_lock_irqsave(&qp->s_lock, flags);
-
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
goto done;
- bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 0;
- /* Send a request. */
+ /* Get the next send request. */
wqe = get_swqe_ptr(qp, qp->s_last);
switch (qp->s_state) {
default:
/*
- * Signal the completion of the last send (if there is
- * one).
+ * Signal the completion of the last send
+ * (if there is one).
*/
if (qp->s_last != qp->s_tail)
complete_last_send(qp, wqe, &wc);
@@ -257,61 +208,16 @@ again:
}
break;
}
- bth2 = qp->s_next_psn++ & IPS_PSN_MASK;
qp->s_len -= len;
- bth0 |= qp->s_state << 24;
-
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- /* Construct the header. */
- extra_bytes = (4 - len) & 3;
- nwords = (len + extra_bytes) >> 2;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- /* Header size in 32-bit words. */
- hwords += 10;
- lrh0 = IPS_LRH_GRH;
- qp->s_hdr.u.l.grh.version_tclass_flow =
- cpu_to_be32((6 << 28) |
- (qp->remote_ah_attr.grh.traffic_class
- << 20) |
- qp->remote_ah_attr.grh.flow_label);
- qp->s_hdr.u.l.grh.paylen =
- cpu_to_be16(((hwords - 12) + nwords +
- SIZE_OF_CRC) << 2);
- /* next_hdr is defined by C8-7 in ch. 8.4.1 */
- qp->s_hdr.u.l.grh.next_hdr = 0x1B;
- qp->s_hdr.u.l.grh.hop_limit =
- qp->remote_ah_attr.grh.hop_limit;
- /* The SGID is 32-bit aligned. */
- qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
- dev->gid_prefix;
- qp->s_hdr.u.l.grh.sgid.global.interface_id =
- ipath_layer_get_guid(dev->dd);
- qp->s_hdr.u.l.grh.dgid = qp->remote_ah_attr.grh.dgid;
- }
qp->s_hdrwords = hwords;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
- lrh0 |= qp->remote_ah_attr.sl << 4;
- qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
- /* DEST LID */
- qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
- bth0 |= extra_bytes << 20;
- ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[2] = cpu_to_be32(bth2);
-
- /* Check for more work to do. */
- goto again;
+ *bth0p = bth0 | (qp->s_state << 24);
+ *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
+ return 1;
done:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
-
-bail:
- return;
+ return 0;
}
/**
@@ -335,12 +241,15 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
u32 hdrsize;
u32 psn;
u32 pad;
- unsigned long flags;
struct ib_wc wc;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ib_reth *reth;
int header_in_data;
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
@@ -356,8 +265,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* size to 56 bytes so the last 4 bytes of
* the BTH header (PSN) is in the data buffer.
*/
- header_in_data =
- ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
psn = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
@@ -373,8 +281,6 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.imm_data = 0;
wc.wc_flags = 0;
- spin_lock_irqsave(&qp->r_rq.lock, flags);
-
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
/*
@@ -535,12 +441,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
/* Check rkey */
- if (unlikely(!ipath_rkey_ok(
- dev, &qp->r_sge, qp->r_len,
- vaddr, rkey,
- IB_ACCESS_REMOTE_WRITE))) {
+ ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
+ vaddr, rkey,
+ IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok)) {
dev->n_pkt_drops++;
goto done;
}
@@ -558,8 +465,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
if (opcode == OP(RDMA_WRITE_ONLY))
goto rdma_last;
- else if (opcode ==
- OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
goto rdma_last_imm;
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
@@ -592,9 +498,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++;
goto done;
}
- if (qp->r_reuse_sge) {
+ if (qp->r_reuse_sge)
qp->r_reuse_sge = 0;
- } else if (!ipath_get_rwqe(qp, 1)) {
+ else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
}
@@ -631,15 +537,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
default:
/* Drop packet for unknown opcodes. */
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
dev->n_pkt_drops++;
- goto bail;
+ goto done;
}
qp->r_psn++;
qp->r_state = opcode;
done:
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-
-bail:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index e606daf83210..49f1102af8b3 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -33,7 +34,52 @@
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
-#include "ips_common.h"
+#include "ipath_kernel.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+ u32 *lengthp, struct ipath_sge_state *ss)
+{
+ int user = to_ipd(qp->ibqp.pd)->user;
+ int i, j, ret;
+ struct ib_wc wc;
+
+ *lengthp = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if ((user && wqe->sg_list[i].lkey == 0) ||
+ !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ *lengthp += wqe->sg_list[i].length;
+ j++;
+ }
+ ss->num_sge = j;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.imm_data = 0;
+ wc.qp_num = qp->ibqp.qp_num;
+ wc.src_qp = 0;
+ wc.wc_flags = 0;
+ wc.pkey_index = 0;
+ wc.slid = 0;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ /* Signal solicited completion event. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
/**
* ipath_ud_loopback - handle send on loopback QPs
@@ -45,6 +91,8 @@
*
* This is called from ipath_post_ud_send() to forward a WQE addressed
* to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
*/
static void ipath_ud_loopback(struct ipath_qp *sqp,
struct ipath_sge_state *ss,
@@ -59,7 +107,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
struct ipath_srq *srq;
struct ipath_sge_state rsge;
struct ipath_sge *sge;
+ struct ipath_rwq *wq;
struct ipath_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ u32 rlen;
qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
if (!qp)
@@ -93,6 +145,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
wc->imm_data = 0;
}
+ if (wr->num_sge > 1) {
+ rsge.sg_list = kmalloc((wr->num_sge - 1) *
+ sizeof(struct ipath_sge),
+ GFP_ATOMIC);
+ } else
+ rsge.sg_list = NULL;
+
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
@@ -100,37 +159,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
*/
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
srq = NULL;
+ handler = NULL;
rq = &qp->r_rq;
}
+
spin_lock_irqsave(&rq->lock, flags);
- if (rq->tail == rq->head) {
- spin_unlock_irqrestore(&rq->lock, flags);
- dev->n_pkt_drops++;
- goto done;
+ wq = rq->wq;
+ tail = wq->tail;
+ while (1) {
+ if (unlikely(tail == wq->head)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto bail_sge;
+ }
+ wqe = get_rwqe_ptr(rq, tail);
+ if (++tail >= rq->size)
+ tail = 0;
+ if (init_sge(qp, wqe, &rlen, &rsge))
+ break;
+ wq->tail = tail;
}
/* Silently drop packets which are too big. */
- wqe = get_rwqe_ptr(rq, rq->tail);
- if (wc->byte_len > wqe->length) {
+ if (wc->byte_len > rlen) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
- goto done;
+ goto bail_sge;
}
+ wq->tail = tail;
wc->wr_id = wqe->wr_id;
- rsge.sge = wqe->sg_list[0];
- rsge.sg_list = wqe->sg_list + 1;
- rsge.num_sge = wqe->num_sge;
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- if (srq && srq->ibsrq.event_handler) {
+ if (handler) {
u32 n;
- if (rq->head < rq->tail)
- n = rq->size + rq->head - rq->tail;
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
else
- n = rq->head - rq->tail;
+ n -= tail;
if (n < srq->limit) {
struct ib_event ev;
@@ -139,12 +213,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- srq->ibsrq.event_handler(&ev,
- srq->ibsrq.srq_context);
+ handler(&ev, srq->ibsrq.srq_context);
} else
spin_unlock_irqrestore(&rq->lock, flags);
} else
spin_unlock_irqrestore(&rq->lock, flags);
+
ah_attr = &to_iah(wr->wr.ud.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -185,7 +259,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
wc->src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc->pkey_index = 0;
- wc->slid = ipath_layer_get_lid(dev->dd) |
+ wc->slid = dev->dd->ipath_lid |
(ah_attr->src_path_bits &
((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
wc->sl = ah_attr->sl;
@@ -195,6 +269,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
wr->send_flags & IB_SEND_SOLICITED);
+bail_sge:
+ kfree(rsge.sg_list);
done:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
@@ -265,7 +341,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
if (wr->sg_list[i].length == 0)
continue;
- if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
+ if (!ipath_lkey_ok(qp, ss.num_sge ?
sg_list + ss.num_sge - 1 : &ss.sge,
&wr->sg_list[i], 0)) {
ret = -EINVAL;
@@ -274,6 +350,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
len += wr->sg_list[i].length;
ss.num_sge++;
}
+ /* Check for invalid packet size. */
+ if (len > dev->dd->ipath_ibmtu) {
+ ret = -EINVAL;
+ goto bail;
+ }
extra_bytes = (4 - len) & 3;
nwords = (len + extra_bytes) >> 2;
@@ -283,8 +364,8 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
ret = -EINVAL;
goto bail;
}
- if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE) {
- if (ah_attr->dlid != IPS_PERMISSIVE_LID)
+ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
+ if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
dev->n_multicast_xmit++;
else
dev->n_unicast_xmit++;
@@ -292,7 +373,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
dev->n_unicast_xmit++;
lid = ah_attr->dlid &
~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
- if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
+ if (unlikely(lid == dev->dd->ipath_lid)) {
/*
* Pass in an uninitialized ib_wc to save stack
* space.
@@ -304,7 +385,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
hwords = 17;
- lrh0 = IPS_LRH_GRH;
+ lrh0 = IPATH_LRH_GRH;
ohdr = &qp->s_hdr.u.l.oth;
qp->s_hdr.u.l.grh.version_tclass_flow =
cpu_to_be32((6 << 28) |
@@ -321,7 +402,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
dev->gid_prefix;
qp->s_hdr.u.l.grh.sgid.global.interface_id =
- ipath_layer_get_guid(dev->dd);
+ dev->dd->ipath_guid;
qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
/*
* Don't worry about sending to locally attached multicast
@@ -330,7 +411,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
} else {
/* Header size in 32-bit words. */
hwords = 7;
- lrh0 = IPS_LRH_BTH;
+ lrh0 = IPATH_LRH_BTH;
ohdr = &qp->s_hdr.u.oth;
}
if (wr->opcode == IB_WR_SEND_WITH_IMM) {
@@ -351,7 +432,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
- lid = ipath_layer_get_lid(dev->dd);
+ lid = dev->dd->ipath_lid;
if (lid) {
lid |= ah_attr->src_path_bits &
((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -361,18 +442,18 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
if (wr->send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
- bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPS_DEFAULT_P_KEY :
- ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
+ ipath_get_pkey(dev->dd, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
*/
- ohdr->bth[1] = ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
- ah_attr->dlid != IPS_PERMISSIVE_LID ?
- __constant_cpu_to_be32(IPS_MULTICAST_QPN) :
+ ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
+ ah_attr->dlid != IPATH_PERMISSIVE_LID ?
+ __constant_cpu_to_be32(IPATH_MULTICAST_QPN) :
cpu_to_be32(wr->wr.ud.remote_qpn);
/* XXX Could lose a PSN count but not worth locking */
- ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPS_PSN_MASK);
+ ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
@@ -427,13 +508,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int opcode;
u32 hdrsize;
u32 pad;
- unsigned long flags;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
- struct ipath_rq *rq;
- struct ipath_srq *srq;
- struct ipath_rwqe *wqe;
u16 dlid;
int header_in_data;
@@ -452,8 +529,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* the eager header buffer size to 56 bytes so the last 12
* bytes of the IB header is in the data buffer.
*/
- header_in_data =
- ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
qkey = be32_to_cpu(((__be32 *) data)[1]);
src_qp = be32_to_cpu(((__be32 *) data)[2]);
@@ -463,7 +539,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
}
}
- src_qp &= IPS_QPN_MASK;
+ src_qp &= IPATH_QPN_MASK;
/*
* Check that the permissive LID is only used on QP0
@@ -541,56 +617,28 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/*
* Get the next work request entry to find where to put the data.
- * Note that it is safe to drop the lock after changing rq->tail
- * since ipath_post_receive() won't fill the empty slot.
*/
- if (qp->ibqp.srq) {
- srq = to_isrq(qp->ibqp.srq);
- rq = &srq->rq;
- } else {
- srq = NULL;
- rq = &qp->r_rq;
- }
- spin_lock_irqsave(&rq->lock, flags);
- if (rq->tail == rq->head) {
- spin_unlock_irqrestore(&rq->lock, flags);
- dev->n_pkt_drops++;
+ if (qp->r_reuse_sge)
+ qp->r_reuse_sge = 0;
+ else if (!ipath_get_rwqe(qp, 0)) {
+ /*
+ * Count VL15 packets dropped due to no receive buffer.
+ * Otherwise, count them as buffer overruns since usually,
+ * the HW will be able to receive packets even if there are
+ * no QPs with posted receive buffers.
+ */
+ if (qp->ibqp.qp_num == 0)
+ dev->n_vl15_dropped++;
+ else
+ dev->rcv_errors++;
goto bail;
}
/* Silently drop packets which are too big. */
- wqe = get_rwqe_ptr(rq, rq->tail);
- if (wc.byte_len > wqe->length) {
- spin_unlock_irqrestore(&rq->lock, flags);
+ if (wc.byte_len > qp->r_len) {
+ qp->r_reuse_sge = 1;
dev->n_pkt_drops++;
goto bail;
}
- wc.wr_id = wqe->wr_id;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->num_sge;
- if (++rq->tail >= rq->size)
- rq->tail = 0;
- if (srq && srq->ibsrq.event_handler) {
- u32 n;
-
- if (rq->head < rq->tail)
- n = rq->size + rq->head - rq->tail;
- else
- n = rq->head - rq->tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- srq->ibsrq.event_handler(&ev,
- srq->ibsrq.srq_context);
- } else
- spin_unlock_irqrestore(&rq->lock, flags);
- } else
- spin_unlock_irqrestore(&rq->lock, flags);
if (has_grh) {
ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh));
@@ -599,6 +647,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
+ wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
@@ -612,7 +661,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
- wc.dlid_path_bits = dlid >= IPS_MULTICAST_LID_BASE ? 0 :
+ wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 2bb08afc86d0..413754b1d8a2 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -57,17 +58,6 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
size_t got;
int ret;
-#if 0
- /*
- * XXX - causes MPI programs to fail, haven't had time to check
- * yet
- */
- if (!capable(CAP_IPC_LOCK)) {
- ret = -EPERM;
- goto bail;
- }
-#endif
-
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
PAGE_SHIFT;
@@ -100,6 +90,62 @@ bail:
}
/**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+ int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_single(hwdev, ptr, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_single(hwdev, phys, size, direction);
+ phys = pci_map_single(hwdev, ptr, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 28fdbdaa789d..42eaed88c281 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,14 +33,12 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
#include <linux/utsname.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
-#include "ips_common.h"
-
-/* Not static, because we don't want the compiler removing it */
-const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
+#include "ipath_common.h"
static unsigned int ib_ipath_qp_table_size = 251;
module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
@@ -51,13 +50,66 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
MODULE_PARM_DESC(lkey_table_size,
"LKEY table size in bits (2^n, 1 <= n <= 23)");
-unsigned int ib_ipath_debug; /* debug mask */
-module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "Verbs debug mask");
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("PathScale <support@pathscale.com>");
-MODULE_DESCRIPTION("Pathscale InfiniPath driver");
+static unsigned int ib_ipath_max_pds = 0xFFFF;
+module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_pds,
+ "Maximum number of protection domains to support");
+
+static unsigned int ib_ipath_max_ahs = 0xFFFF;
+module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
+
+unsigned int ib_ipath_max_cqes = 0x2FFFF;
+module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_cqes,
+ "Maximum number of completion queue entries to support");
+
+unsigned int ib_ipath_max_cqs = 0x1FFFF;
+module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
+
+unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
+module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
+
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
+unsigned int ib_ipath_max_sges = 0x60;
+module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
+
+unsigned int ib_ipath_max_mcast_grps = 16384;
+module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_mcast_grps,
+ "Maximum number of multicast groups to support");
+
+unsigned int ib_ipath_max_mcast_qp_attached = 16;
+module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
+ uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_mcast_qp_attached,
+ "Maximum number of attached QPs to support");
+
+unsigned int ib_ipath_max_srqs = 1024;
+module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
+
+unsigned int ib_ipath_max_srq_sges = 128;
+module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
+ uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
+
+unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
+module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
+ uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
+
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0,
@@ -71,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_ERR] = 0,
};
+struct ipath_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
@@ -137,10 +199,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
{
struct ipath_sge *sge = &ss->sge;
- while (length > sge->sge_length) {
- length -= sge->sge_length;
- ss->sge = *ss->sg_list++;
- }
while (length) {
u32 len = sge->length;
@@ -193,7 +251,7 @@ static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (qp->ibqp.qp_type) {
case IB_QPT_UC:
case IB_QPT_RC:
- err = ipath_post_rc_send(qp, wr);
+ err = ipath_post_ruc_send(qp, wr);
break;
case IB_QPT_SMI:
@@ -227,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_rwq *wq = qp->r_rq.wq;
unsigned long flags;
int ret;
/* Check that state is OK to post receive. */
- if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
*bad_wr = wr;
ret = -EINVAL;
goto bail;
@@ -240,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
- int i, j;
+ int i;
- if (wr->num_sge > qp->r_rq.max_sge) {
+ if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&qp->r_rq.lock, flags);
- next = qp->r_rq.head + 1;
+ next = wq->head + 1;
if (next >= qp->r_rq.size)
next = 0;
- if (next == qp->r_rq.tail) {
+ if (next == wq->tail) {
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
- wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+ wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
wqe->wr_id = wr->wr_id;
- wqe->sg_list[0].mr = NULL;
- wqe->sg_list[0].vaddr = NULL;
- wqe->sg_list[0].length = 0;
- wqe->sg_list[0].sge_length = 0;
- wqe->length = 0;
- for (i = 0, j = 0; i < wr->num_sge; i++) {
- /* Check LKEY */
- if (to_ipd(qp->ibqp.pd)->user &&
- wr->sg_list[i].lkey == 0) {
- spin_unlock_irqrestore(&qp->r_rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- if (wr->sg_list[i].length == 0)
- continue;
- if (!ipath_lkey_ok(
- &to_idev(qp->ibqp.device)->lk_table,
- &wqe->sg_list[j], &wr->sg_list[i],
- IB_ACCESS_LOCAL_WRITE)) {
- spin_unlock_irqrestore(&qp->r_rq.lock,
- flags);
- *bad_wr = wr;
- ret = -EINVAL;
- goto bail;
- }
- wqe->length += wr->sg_list[i].length;
- j++;
- }
- wqe->num_sge = j;
- qp->r_rq.head = next;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ wq->head = next;
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
}
ret = 0;
@@ -327,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
+ if (ib_ipath_disable_sma)
+ break;
+ /* FALLTHROUGH */
case IB_QPT_UD:
ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
break;
@@ -345,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
}
/**
- * ipath_ib_rcv - process and incoming packet
+ * ipath_ib_rcv - process an incoming packet
* @arg: the device pointer
* @rhdr: the header of the packet
* @data: the packet data
@@ -354,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
* This is called from ipath_kreceive() to process an incoming packet at
* interrupt level. Tlen is the length of the header + data + CRC in bytes.
*/
-static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+ u32 tlen)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ipath_ib_header *hdr = rhdr;
struct ipath_other_headers *ohdr;
struct ipath_qp *qp;
@@ -375,9 +409,9 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
/* Check for a valid destination LID (see ch. 7.11.1). */
lid = be16_to_cpu(hdr->lrh[1]);
- if (lid < IPS_MULTICAST_LID_BASE) {
+ if (lid < IPATH_MULTICAST_LID_BASE) {
lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
- if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
+ if (unlikely(lid != dev->dd->ipath_lid)) {
dev->rcv_errors++;
goto bail;
}
@@ -385,9 +419,9 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
/* Check for GRH */
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == IPS_LRH_BTH)
+ if (lnh == IPATH_LRH_BTH)
ohdr = &hdr->u.oth;
- else if (lnh == IPS_LRH_GRH)
+ else if (lnh == IPATH_LRH_GRH)
ohdr = &hdr->u.l.oth;
else {
dev->rcv_errors++;
@@ -399,8 +433,8 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
dev->opstats[opcode].n_packets++;
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & IPS_QPN_MASK;
- if (qp_num == IPS_MULTICAST_QPN) {
+ qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
+ if (qp_num == IPATH_MULTICAST_QPN) {
struct ipath_mcast *mcast;
struct ipath_mcast_qp *p;
@@ -411,7 +445,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
}
dev->n_multicast_rcv++;
list_for_each_entry_rcu(p, &mcast->qp_list, list)
- ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
+ ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
tlen, p->qp);
/*
* Notify ipath_multicast_detach() if it is waiting for us
@@ -423,7 +457,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
if (qp) {
dev->n_unicast_rcv++;
- ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
+ ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
tlen, qp);
/*
* Notify ipath_destroy_qp() if it is waiting
@@ -445,9 +479,8 @@ bail:;
* This is called from ipath_do_rcv_timer() at interrupt level to check for
* QPs which need retransmits and to collect performance numbers.
*/
-static void ipath_ib_timer(void *arg)
+void ipath_ib_timer(struct ipath_ibdev *dev)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ipath_qp *resend = NULL;
struct list_head *last;
struct ipath_qp *qp;
@@ -489,19 +522,19 @@ static void ipath_ib_timer(void *arg)
if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
--dev->pma_sample_start == 0) {
dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
- ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
- &dev->ipath_rword,
- &dev->ipath_spkts,
- &dev->ipath_rpkts,
- &dev->ipath_xmit_wait);
+ ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+ &dev->ipath_rword,
+ &dev->ipath_spkts,
+ &dev->ipath_rpkts,
+ &dev->ipath_xmit_wait);
}
if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
if (dev->pma_sample_interval == 0) {
u64 ta, tb, tc, td, te;
dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
- ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
- &tc, &td, &te);
+ ipath_snapshot_counters(dev->dd, &ta, &tb,
+ &tc, &td, &te);
dev->ipath_sword = ta - dev->ipath_sword;
dev->ipath_rword = tb - dev->ipath_rword;
@@ -531,6 +564,365 @@ static void ipath_ib_timer(void *arg)
}
}
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ sge->vaddr += length;
+ sge->length -= length;
+ sge->sge_length -= length;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ return;
+ sge->n = 0;
+ }
+ sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+ u32 length)
+{
+ u32 extra = 0;
+ u32 data = 0;
+ u32 last;
+
+ while (1) {
+ u32 len = ss->sge.length;
+ u32 off;
+
+ BUG_ON(len == 0);
+ if (len > length)
+ len = length;
+ if (len > ss->sge.sge_length)
+ len = ss->sge.sge_length;
+ /* If the source address is not aligned, try to align it. */
+ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+ if (off) {
+ u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+ ~(sizeof(u32) - 1));
+ u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+ u32 y;
+
+ y = sizeof(u32) - off;
+ if (len > y)
+ len = y;
+ if (len + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, extra *
+ BITS_PER_BYTE);
+ len = sizeof(u32) - extra;
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, len, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += len;
+ }
+ } else if (extra) {
+ /* Source address is aligned. */
+ u32 *addr = (u32 *) ss->sge.vaddr;
+ int shift = extra * BITS_PER_BYTE;
+ int ushift = 32 - shift;
+ u32 l = len;
+
+ while (l >= sizeof(u32)) {
+ u32 v = *addr;
+
+ data |= set_upper_bits(v, shift);
+ __raw_writel(data, piobuf);
+ data = get_upper_bits(v, ushift);
+ piobuf++;
+ addr++;
+ l -= sizeof(u32);
+ }
+ /*
+ * We still have 'extra' number of bytes leftover.
+ */
+ if (l) {
+ u32 v = *addr;
+
+ if (l + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, shift);
+ len -= l + extra - sizeof(u32);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, l,
+ extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += l;
+ }
+ } else if (len == length) {
+ last = data;
+ break;
+ }
+ } else if (len == length) {
+ u32 w;
+
+ /*
+ * Need to round up for the last dword in the
+ * packet.
+ */
+ w = (len + 3) >> 2;
+ __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+ piobuf += w - 1;
+ last = ((u32 *) ss->sge.vaddr)[w - 1];
+ break;
+ } else {
+ u32 w = len >> 2;
+
+ __iowrite32_copy(piobuf, ss->sge.vaddr, w);
+ piobuf += w;
+
+ extra = len & (sizeof(u32) - 1);
+ if (extra) {
+ u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+ /* Clear unused upper bytes */
+ data = clear_upper_bytes(v, extra, 0);
+ }
+ }
+ update_sge(ss, len);
+ length -= len;
+ }
+ /* Update address before sending packet. */
+ update_sge(ss, length);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @dd: the infinipath device
+ * @hdrwords: the number of words in the header
+ * @hdr: the packet header
+ * @len: the length of the packet in bytes
+ * @ss: the SGE to send
+ */
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+ u32 *hdr, u32 len, struct ipath_sge_state *ss)
+{
+ u32 __iomem *piobuf;
+ u32 plen;
+ int ret;
+
+ /* +1 is for the qword padding of pbc */
+ plen = hdrwords + ((len + 3) >> 2) + 1;
+ if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
+ ipath_dbg("packet len 0x%x too long, failing\n", plen);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /* Get a PIO buffer to use. */
+ piobuf = ipath_getpiobuf(dd, NULL);
+ if (unlikely(piobuf == NULL)) {
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ /*
+ * Write len to control qword, no flags.
+ * We have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order.
+ */
+ writeq(plen, piobuf);
+ ipath_flush_wc();
+ piobuf += 2;
+ if (len == 0) {
+ /*
+ * If there is just the header portion, must flush before
+ * writing last word of header for correctness, and after
+ * the last header word (trigger word).
+ */
+ __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ ipath_flush_wc();
+ ret = 0;
+ goto bail;
+ }
+
+ __iowrite32_copy(piobuf, hdr, hdrwords);
+ piobuf += hdrwords;
+
+ /* The common case is aligned and contained in one segment. */
+ if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+ !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+ u32 w;
+ u32 *addr = (u32 *) ss->sge.vaddr;
+
+ /* Update address before sending packet. */
+ update_sge(ss, len);
+ /* Need to round up for the last dword in the packet. */
+ w = (len + 3) >> 2;
+ __iowrite32_copy(piobuf, addr, w - 1);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(addr[w - 1], piobuf + w - 1);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ ret = 0;
+ goto bail;
+ }
+ copy_io(piobuf, ss, len);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait)
+{
+ int ret;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+ ret = -EINVAL;
+ goto bail;
+ }
+ *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+ *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+ *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+ struct ipath_verbs_counters *cntrs)
+{
+ int ret;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+ ret = -EINVAL;
+ goto bail;
+ }
+ cntrs->symbol_error_counter =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+ cntrs->link_error_recovery_counter =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ /*
+ * The link downed counter counts when the other side downs the
+ * connection. We add in the number of times we downed the link
+ * due to local link integrity errors to compensate.
+ */
+ cntrs->link_downed_counter =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+ cntrs->port_rcv_errors =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
+ dd->ipath_rxfc_unsupvl_errs;
+ cntrs->port_rcv_remphys_errors =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
+ cntrs->port_xmit_discards =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
+ cntrs->port_xmit_data =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ cntrs->port_rcv_data =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ cntrs->port_xmit_packets =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+ cntrs->port_rcv_packets =
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+ cntrs->local_link_integrity_errors =
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors;
+ cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
/**
* ipath_ib_piobufavail - callback when a PIO buffer is available
* @arg: the device pointer
@@ -541,9 +933,8 @@ static void ipath_ib_timer(void *arg)
* QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
* return zero).
*/
-static int ipath_ib_piobufavail(void *arg)
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ipath_qp *qp;
unsigned long flags;
@@ -567,40 +958,39 @@ static int ipath_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct ipath_ibdev *dev = to_idev(ibdev);
- u32 vendor, boardrev, majrev, minrev;
memset(props, 0, sizeof(*props));
props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID;
- ipath_layer_query_device(dev->dd, &vendor, &boardrev,
- &majrev, &minrev);
- props->vendor_id = vendor;
- props->vendor_part_id = boardrev;
- props->hw_ver = boardrev << 16 | majrev << 8 | minrev;
+ props->page_size_cap = PAGE_SIZE;
+ props->vendor_id = dev->dd->ipath_vendorid;
+ props->vendor_part_id = dev->dd->ipath_deviceid;
+ props->hw_ver = dev->dd->ipath_pcirev;
props->sys_image_guid = dev->sys_image_guid;
props->max_mr_size = ~0ull;
- props->max_qp = 0xffff;
- props->max_qp_wr = 0xffff;
- props->max_sge = 255;
- props->max_cq = 0xffff;
- props->max_cqe = 0xffff;
- props->max_mr = 0xffff;
- props->max_pd = 0xffff;
+ props->max_qp = ib_ipath_max_qps;
+ props->max_qp_wr = ib_ipath_max_qp_wrs;
+ props->max_sge = ib_ipath_max_sges;
+ props->max_cq = ib_ipath_max_cqs;
+ props->max_ah = ib_ipath_max_ahs;
+ props->max_cqe = ib_ipath_max_cqes;
+ props->max_mr = dev->lk_table.max;
+ props->max_pd = ib_ipath_max_pds;
props->max_qp_rd_atom = 1;
props->max_qp_init_rd_atom = 1;
/* props->max_res_rd_atom */
- props->max_srq = 0xffff;
- props->max_srq_wr = 0xffff;
- props->max_srq_sge = 255;
+ props->max_srq = ib_ipath_max_srqs;
+ props->max_srq_wr = ib_ipath_max_srq_wrs;
+ props->max_srq_sge = ib_ipath_max_srq_sges;
/* props->local_ca_ack_delay */
props->atomic_cap = IB_ATOMIC_HCA;
- props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
- props->max_mcast_grp = 0xffff;
- props->max_mcast_qp_attach = 0xffff;
+ props->max_pkeys = ipath_get_npkeys(dev->dd);
+ props->max_mcast_grp = ib_ipath_max_mcast_grps;
+ props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
@@ -623,12 +1013,17 @@ const u8 ipath_cvt_physportstate[16] = {
[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
};
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+ return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
static int ipath_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
struct ipath_ibdev *dev = to_idev(ibdev);
enum ib_mtu mtu;
- u16 lid = ipath_layer_get_lid(dev->dd);
+ u16 lid = dev->dd->ipath_lid;
u64 ibcstat;
memset(props, 0, sizeof(*props));
@@ -636,17 +1031,17 @@ static int ipath_query_port(struct ib_device *ibdev,
props->lmc = dev->mkeyprot_resv_lmc & 7;
props->sm_lid = dev->sm_lid;
props->sm_sl = dev->sm_sl;
- ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+ ibcstat = dev->dd->ipath_lastibcstat;
props->state = ((ibcstat >> 4) & 0x3) + 1;
/* See phys_state_show() */
props->phys_state = ipath_cvt_physportstate[
- ipath_layer_get_lastibcstat(dev->dd) & 0xf];
+ dev->dd->ipath_lastibcstat & 0xf];
props->port_cap_flags = dev->port_cap_flags;
props->gid_tbl_len = 1;
- props->max_msg_sz = 4096;
- props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
- props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
- dev->n_pkey_violations;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
+ props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
+ dev->z_pkey_violations;
props->qkey_viol_cntr = dev->qkey_violations;
props->active_width = IB_WIDTH_4X;
/* See rate_show() */
@@ -655,7 +1050,7 @@ static int ipath_query_port(struct ib_device *ibdev,
props->init_type_reply = 0;
props->max_mtu = IB_MTU_4096;
- switch (ipath_layer_get_ibmtu(dev->dd)) {
+ switch (dev->dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
break;
@@ -714,7 +1109,7 @@ static int ipath_modify_port(struct ib_device *ibdev,
dev->port_cap_flags |= props->set_port_cap_mask;
dev->port_cap_flags &= ~props->clr_port_cap_mask;
if (port_modify_mask & IB_PORT_SHUTDOWN)
- ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
dev->qkey_violations = 0;
return 0;
@@ -731,7 +1126,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port,
goto bail;
}
gid->global.subnet_prefix = dev->gid_prefix;
- gid->global.interface_id = ipath_layer_get_guid(dev->dd);
+ gid->global.interface_id = dev->dd->ipath_guid;
ret = 0;
@@ -743,15 +1138,34 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
+ struct ipath_ibdev *dev = to_idev(ibdev);
struct ipath_pd *pd;
struct ib_pd *ret;
+ /*
+ * This is actually totally arbitrary. Some correctness tests
+ * assume there's a maximum number of PDs that can be allocated.
+ * We don't actually have this limit, but we fail the test if
+ * we allow allocations of more than we report for this value.
+ */
+
pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
+ spin_lock(&dev->n_pds_lock);
+ if (dev->n_pds_allocated == ib_ipath_max_pds) {
+ spin_unlock(&dev->n_pds_lock);
+ kfree(pd);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_pds_allocated++;
+ spin_unlock(&dev->n_pds_lock);
+
/* ib_alloc_pd() will initialize pd->ibpd. */
pd->user = udata != NULL;
@@ -764,6 +1178,11 @@ bail:
static int ipath_dealloc_pd(struct ib_pd *ibpd)
{
struct ipath_pd *pd = to_ipd(ibpd);
+ struct ipath_ibdev *dev = to_idev(ibpd->device);
+
+ spin_lock(&dev->n_pds_lock);
+ dev->n_pds_allocated--;
+ spin_unlock(&dev->n_pds_lock);
kfree(pd);
@@ -782,21 +1201,45 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
{
struct ipath_ah *ah;
struct ib_ah *ret;
+ struct ipath_ibdev *dev = to_idev(pd->device);
+ unsigned long flags;
/* A multicast address requires a GRH (see ch. 8.4.1). */
- if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
- ah_attr->dlid != IPS_PERMISSIVE_LID &&
+ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
+ ah_attr->dlid != IPATH_PERMISSIVE_LID &&
!(ah_attr->ah_flags & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
+ if (ah_attr->dlid == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ if (ah_attr->port_num < 1 ||
+ ah_attr->port_num > pd->device->phys_port_cnt) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
ah = kmalloc(sizeof *ah, GFP_ATOMIC);
if (!ah) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+ kfree(ah);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_ahs_allocated++;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
/* ib_create_ah() will initialize ah->ibah. */
ah->attr = *ah_attr;
@@ -814,7 +1257,13 @@ bail:
*/
static int ipath_destroy_ah(struct ib_ah *ibah)
{
+ struct ipath_ibdev *dev = to_idev(ibah->device);
struct ipath_ah *ah = to_iah(ibah);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ dev->n_ahs_allocated--;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
@@ -830,25 +1279,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
return 0;
}
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+ return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+ unsigned ret;
+
+ if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+ ret = 0;
+ else
+ ret = dd->ipath_pd[0]->port_pkeys[index];
+
+ return ret;
+}
+
static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
struct ipath_ibdev *dev = to_idev(ibdev);
int ret;
- if (index >= ipath_layer_get_npkeys(dev->dd)) {
+ if (index >= ipath_get_npkeys(dev->dd)) {
ret = -EINVAL;
goto bail;
}
- *pkey = ipath_layer_get_pkey(dev->dd, index);
+ *pkey = ipath_get_pkey(dev->dd, index);
ret = 0;
bail:
return ret;
}
-
/**
* ipath_alloc_ucontext - allocate a ucontest
* @ibdev: the infiniband device
@@ -881,25 +1355,102 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context)
static int ipath_verbs_register_sysfs(struct ib_device *dev);
+static void __verbs_timer(unsigned long arg)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+ /*
+ * If port 0 receive packet interrupts are not available, or
+ * can be missed, poll the receive queue
+ */
+ if (dd->ipath_flags & IPATH_POLL_RX_INTR)
+ ipath_kreceive(dd);
+
+ /* Handle verbs layer timeouts. */
+ ipath_ib_timer(dd->verbs_dev);
+
+ mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+ /*
+ * Early chips had a design flaw where the chip and kernel idea
+ * of the tail register don't always agree, and therefore we won't
+ * get an interrupt on the next packet received.
+ * If the board supports per packet receive interrupts, use it.
+ * Otherwise, the timer function periodically checks for packets
+ * to cover this case.
+ * Either way, the timer is needed for verbs layer related
+ * processing.
+ */
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ u64 val;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+ 0x2074076542310ULL);
+ /* Enable GPIO bit 2 interrupt */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ }
+
+ init_timer(&dd->verbs_timer);
+ dd->verbs_timer.function = __verbs_timer;
+ dd->verbs_timer.data = (unsigned long)dd;
+ dd->verbs_timer.expires = jiffies + 1;
+ add_timer(&dd->verbs_timer);
+
+ return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+ /* Disable GPIO bit 2 interrupt */
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ u64 val;
+ /* Disable GPIO bit 2 interrupt */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+ val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ /*
+ * We might want to undo changes to debugportselect,
+ * but how?
+ */
+ }
+
+ del_timer_sync(&dd->verbs_timer);
+
+ return 0;
+}
+
/**
* ipath_register_ib_device - register our device with the infiniband core
- * @unit: the device number to register
* @dd: the device data structure
* Return the allocated ipath_ibdev pointer or NULL on error.
*/
-static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
+int ipath_register_ib_device(struct ipath_devdata *dd)
{
+ struct ipath_verbs_counters cntrs;
struct ipath_ibdev *idev;
struct ib_device *dev;
int ret;
idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
- if (idev == NULL)
+ if (idev == NULL) {
+ ret = -ENOMEM;
goto bail;
+ }
dev = &idev->ibdev;
/* Only need to initialize non-zero fields. */
+ spin_lock_init(&idev->n_pds_lock);
+ spin_lock_init(&idev->n_ahs_lock);
+ spin_lock_init(&idev->n_cqs_lock);
+ spin_lock_init(&idev->n_qps_lock);
+ spin_lock_init(&idev->n_srqs_lock);
+ spin_lock_init(&idev->n_mcast_grps_lock);
+
spin_lock_init(&idev->qp_table.lock);
spin_lock_init(&idev->lk_table.lock);
idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
@@ -939,20 +1490,39 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
idev->link_width_enabled = 3; /* 1x or 4x */
+ /* Snapshot current HW counters to "clear" them. */
+ ipath_get_counters(dd, &cntrs);
+ idev->z_symbol_error_counter = cntrs.symbol_error_counter;
+ idev->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+ idev->z_link_downed_counter = cntrs.link_downed_counter;
+ idev->z_port_rcv_errors = cntrs.port_rcv_errors;
+ idev->z_port_rcv_remphys_errors =
+ cntrs.port_rcv_remphys_errors;
+ idev->z_port_xmit_discards = cntrs.port_xmit_discards;
+ idev->z_port_xmit_data = cntrs.port_xmit_data;
+ idev->z_port_rcv_data = cntrs.port_rcv_data;
+ idev->z_port_xmit_packets = cntrs.port_xmit_packets;
+ idev->z_port_rcv_packets = cntrs.port_rcv_packets;
+ idev->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+ idev->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+
/*
* The system image GUID is supposed to be the same for all
* IB HCAs in a single system but since there can be other
* device types in the system, we can't be sure this is unique.
*/
if (!sys_image_guid)
- sys_image_guid = ipath_layer_get_guid(dd);
+ sys_image_guid = dd->ipath_guid;
idev->sys_image_guid = sys_image_guid;
- idev->ib_unit = unit;
+ idev->ib_unit = dd->ipath_unit;
idev->dd = dd;
strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
dev->owner = THIS_MODULE;
- dev->node_guid = ipath_layer_get_guid(dd);
+ dev->node_guid = dd->ipath_guid;
dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -984,9 +1554,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
- dev->node_type = IB_NODE_CA;
+ dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
- dev->dma_device = ipath_layer_get_device(dd);
+ dev->dma_device = &dd->pcidev->dev;
dev->class_dev.dev = dev->dma_device;
dev->query_device = ipath_query_device;
dev->modify_device = ipath_modify_device;
@@ -1028,9 +1598,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
dev->attach_mcast = ipath_multicast_attach;
dev->detach_mcast = ipath_multicast_detach;
dev->process_mad = ipath_process_mad;
+ dev->mmap = ipath_mmap;
snprintf(dev->node_desc, sizeof(dev->node_desc),
- IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
+ IPATH_IDSTR " %s", system_utsname.nodename);
ret = ib_register_device(dev);
if (ret)
@@ -1039,7 +1610,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
if (ipath_verbs_register_sysfs(dev))
goto err_class;
- ipath_layer_enable_timer(dd);
+ enable_timer(dd);
goto bail;
@@ -1051,37 +1622,32 @@ err_lk:
kfree(idev->qp_table.table);
err_qp:
ib_dealloc_device(dev);
- _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
- unit, -ret);
+ ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
idev = NULL;
bail:
- return idev;
+ dd->verbs_dev = idev;
+ return ret;
}
-static void ipath_unregister_ib_device(void *arg)
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
{
- struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
struct ib_device *ibdev = &dev->ibdev;
- ipath_layer_disable_timer(dev->dd);
+ disable_timer(dev->dd);
ib_unregister_device(ibdev);
if (!list_empty(&dev->pending[0]) ||
!list_empty(&dev->pending[1]) ||
!list_empty(&dev->pending[2]))
- _VERBS_ERROR("ipath%d pending list not empty!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "pending list not empty!\n");
if (!list_empty(&dev->piowait))
- _VERBS_ERROR("ipath%d piowait list not empty!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "piowait list not empty!\n");
if (!list_empty(&dev->rnrwait))
- _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
if (!ipath_mcast_tree_empty())
- _VERBS_ERROR("ipath%d multicast table memory leak!\n",
- dev->ib_unit);
+ ipath_dev_err(dev->dd, "multicast table memory leak!\n");
/*
* Note that ipath_unregister_ib_device() can be called before all
* the QPs are destroyed!
@@ -1092,28 +1658,12 @@ static void ipath_unregister_ib_device(void *arg)
ib_dealloc_device(ibdev);
}
-static int __init ipath_verbs_init(void)
-{
- return ipath_verbs_register(ipath_register_ib_device,
- ipath_unregister_ib_device,
- ipath_ib_piobufavail, ipath_ib_rcv,
- ipath_ib_timer);
-}
-
-static void __exit ipath_verbs_cleanup(void)
-{
- ipath_verbs_unregister();
-}
-
static ssize_t show_rev(struct class_device *cdev, char *buf)
{
struct ipath_ibdev *dev =
container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
- int vendor, boardrev, majrev, minrev;
- ipath_layer_query_device(dev->dd, &vendor, &boardrev,
- &majrev, &minrev);
- return sprintf(buf, "%d.%d\n", majrev, minrev);
+ return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
}
static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1122,7 +1672,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
int ret;
- ret = ipath_layer_get_boardname(dev->dd, buf, 128);
+ ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
if (ret < 0)
goto bail;
strcat(buf, "\n");
@@ -1149,6 +1699,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
"RC OTH NAKs %d\n"
"RC timeouts %d\n"
"RC RDMA dup %d\n"
+ "RC stalls %d\n"
"piobuf wait %d\n"
"no piobuf %d\n"
"PKT drops %d\n"
@@ -1156,7 +1707,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
- dev->n_rdma_dup_busy, dev->n_piowait,
+ dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
@@ -1199,6 +1750,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
bail:
return ret;
}
-
-module_init(ipath_verbs_init);
-module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 4f8d59300e9b..8039f6e5f0c8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -37,10 +38,10 @@
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kref.h>
#include <rdma/ib_pack.h>
#include "ipath_layer.h"
-#include "verbs_debug.h"
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -49,7 +50,7 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define IPATH_UVERBS_ABI_VERSION 1
+#define IPATH_UVERBS_ABI_VERSION 2
/*
* Define an ib_cq_notify value that is not valid so we know when CQ
@@ -148,19 +149,7 @@ struct ipath_mcast {
struct list_head qp_list;
wait_queue_head_t wait;
atomic_t refcount;
-};
-
-/* Memory region */
-struct ipath_mr {
- struct ib_mr ibmr;
- struct ipath_mregion mr; /* must be last */
-};
-
-/* Fast memory region */
-struct ipath_fmr {
- struct ib_fmr ibfmr;
- u8 page_shift;
- struct ipath_mregion mr; /* must be last */
+ int n_attached;
};
/* Protection domain */
@@ -176,58 +165,90 @@ struct ipath_ah {
};
/*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table. Each
- * struct ipath_cq/qp also has its own lock. An individual qp lock
- * may be taken inside of an individual cq lock. Both cqs attached to
- * a qp may be locked, with the send cq locked first. No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count. The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed. Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- * indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct ipath_mmap_info {
+ struct ipath_mmap_info *next;
+ struct ib_ucontext *context;
+ void *obj;
+ struct kref ref;
+ unsigned size;
+ unsigned mmap_cnt;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
*/
+struct ipath_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+};
+/*
+ * The completion queue structure.
+ */
struct ipath_cq {
struct ib_cq ibcq;
struct tasklet_struct comptask;
spinlock_t lock;
u8 notify;
u8 triggered;
- u32 head; /* new records added to the head */
- u32 tail; /* poll_cq() reads from here. */
- struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */
+ struct ipath_cq_wc *queue;
+ struct ipath_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+ struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of ipath_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ struct ipath_segarray *map[0]; /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+ struct ipath_mregion *mr;
+ void *vaddr; /* current pointer into the segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+ struct ib_mr ibmr;
+ struct ipath_mregion mr; /* must be last */
};
/*
@@ -246,32 +267,50 @@ struct ipath_swqe {
/*
* Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
*/
struct ipath_rwqe {
u64 wr_id;
- u32 length; /* total length of data in sg_list */
u8 num_sge;
- struct ipath_sge sg_list[0];
+ struct ib_sge sg_list[0];
};
-struct ipath_rq {
- spinlock_t lock;
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
u32 head; /* new work requests posted to the head */
u32 tail; /* receives pull requests from here. */
+ struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+ struct ipath_rwq *wq;
+ spinlock_t lock;
u32 size; /* size of RWQE array */
u8 max_sge;
- struct ipath_rwqe *wq; /* RWQE array */
};
struct ipath_srq {
struct ib_srq ibsrq;
struct ipath_rq rq;
+ struct ipath_mmap_info *ip;
/* send signal when number of RWQEs < limit */
u32 limit;
};
+struct ipath_sge_state {
+ struct ipath_sge *sg_list; /* next SGE to be used if any */
+ struct ipath_sge sge; /* progress state for the current SGE */
+ u8 num_sge;
+};
+
/*
* Variables prefixed with s_ are for the requester (sender).
* Variables prefixed with r_ are for the responder (receiver).
@@ -291,6 +330,7 @@ struct ipath_qp {
atomic_t refcount;
wait_queue_head_t wait;
struct tasklet_struct s_task;
+ struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
struct ipath_sge_state s_sge; /* current send request data */
/* current RDMA read send data */
@@ -305,32 +345,37 @@ struct ipath_qp {
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
+ u32 s_ack_psn; /* PSN for RDMA_READ */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
- u32 s_ack_psn; /* PSN for next ACK or RDMA_READ */
- u64 s_ack_atomic; /* data for atomic ACK */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
u64 r_atomic_data; /* data for last atomic op */
u32 r_atomic_psn; /* PSN of last atomic op */
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
u8 state; /* QP state */
u8 s_state; /* opcode of last packet sent */
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
+ u8 r_ack_state; /* opcode of packet to ACK */
+ u8 r_nak_state; /* non-zero if NAK is pending */
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
- u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
u8 qp_access_flags;
+ u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
- u8 s_min_rnr_timer;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
+ u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
- atomic_t msn; /* message sequence number */
u32 remote_qpn;
u32 qkey; /* QKEY for this QP (for UD or RD) */
u32 s_size; /* send work queue size */
@@ -341,7 +386,8 @@ struct ipath_qp {
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
- struct ipath_rq r_rq; /* receive work queue */
+ struct ipath_rq r_rq; /* receive work queue */
+ struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
/*
@@ -350,6 +396,8 @@ struct ipath_qp {
#define IPATH_S_BUSY 0
#define IPATH_S_SIGNAL_REQ_WR 1
+#define IPATH_PSN_CREDIT 2048
+
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
* struct ipath_qp.s_wq. This function does the array index computation.
@@ -365,15 +413,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
/*
* Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq. This function does the array index computation.
+ * struct ipath_rwq.wq. This function does the array index computation.
*/
static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
unsigned n)
{
return (struct ipath_rwqe *)
- ((char *) rq->wq +
+ ((char *) rq->wq->wq +
(sizeof(struct ipath_rwqe) +
- rq->max_sge * sizeof(struct ipath_sge)) * n);
+ rq->max_sge * sizeof(struct ib_sge)) * n);
}
/*
@@ -413,6 +461,7 @@ struct ipath_ibdev {
struct ib_device ibdev;
struct list_head dev_list;
struct ipath_devdata *dd;
+ struct ipath_mmap_info *pending_mmaps;
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
@@ -431,6 +480,20 @@ struct ipath_ibdev {
__be64 sys_image_guid; /* in network order */
__be64 gid_prefix; /* in network order */
__be64 mkey;
+
+ u32 n_pds_allocated; /* number of PDs allocated for device */
+ spinlock_t n_pds_lock;
+ u32 n_ahs_allocated; /* number of AHs allocated for device */
+ spinlock_t n_ahs_lock;
+ u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ spinlock_t n_qps_lock;
+ u32 n_srqs_allocated; /* number of SRQs allocated for device */
+ spinlock_t n_srqs_lock;
+ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+
u64 ipath_sword; /* total dwords sent (sample result) */
u64 ipath_rword; /* total dwords received (sample result) */
u64 ipath_spkts; /* total packets sent (sample result) */
@@ -442,17 +505,19 @@ struct ipath_ibdev {
u64 n_unicast_rcv; /* total unicast packets received */
u64 n_multicast_xmit; /* total multicast packets sent */
u64 n_multicast_rcv; /* total multicast packets received */
- u64 n_symbol_error_counter; /* starting count for PMA */
- u64 n_link_error_recovery_counter; /* starting count for PMA */
- u64 n_link_downed_counter; /* starting count for PMA */
- u64 n_port_rcv_errors; /* starting count for PMA */
- u64 n_port_rcv_remphys_errors; /* starting count for PMA */
- u64 n_port_xmit_discards; /* starting count for PMA */
- u64 n_port_xmit_data; /* starting count for PMA */
- u64 n_port_rcv_data; /* starting count for PMA */
- u64 n_port_xmit_packets; /* starting count for PMA */
- u64 n_port_rcv_packets; /* starting count for PMA */
- u32 n_pkey_violations; /* starting count for PMA */
+ u64 z_symbol_error_counter; /* starting count for PMA */
+ u64 z_link_error_recovery_counter; /* starting count for PMA */
+ u64 z_link_downed_counter; /* starting count for PMA */
+ u64 z_port_rcv_errors; /* starting count for PMA */
+ u64 z_port_rcv_remphys_errors; /* starting count for PMA */
+ u64 z_port_xmit_discards; /* starting count for PMA */
+ u64 z_port_xmit_data; /* starting count for PMA */
+ u64 z_port_rcv_data; /* starting count for PMA */
+ u64 z_port_xmit_packets; /* starting count for PMA */
+ u64 z_port_rcv_packets; /* starting count for PMA */
+ u32 z_pkey_violations; /* starting count for PMA */
+ u32 z_local_link_integrity_errors; /* starting count for PMA */
+ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
u32 n_rc_resends;
u32 n_rc_acks;
u32 n_rc_qacks;
@@ -461,7 +526,9 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
+ u32 n_rc_stalls;
u32 n_pkt_drops;
+ u32 n_vl15_dropped;
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
@@ -482,8 +549,19 @@ struct ipath_ibdev {
struct ipath_opcode_stats opstats[128];
};
-struct ipath_ucontext {
- struct ib_ucontext ibucontext;
+struct ipath_verbs_counters {
+ u64 symbol_error_counter;
+ u64 link_error_recovery_counter;
+ u64 link_downed_counter;
+ u64 port_rcv_errors;
+ u64 port_rcv_remphys_errors;
+ u64 port_xmit_discards;
+ u64 port_xmit_data;
+ u64 port_rcv_data;
+ u64 port_xmit_packets;
+ u64 port_rcv_packets;
+ u32 local_link_integrity_errors;
+ u32 excessive_buffer_overrun_errors;
};
static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -491,11 +569,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
return container_of(ibmr, struct ipath_mr, ibmr);
}
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct ipath_pd, ibpd);
@@ -533,12 +606,6 @@ int ipath_process_mad(struct ib_device *ibdev,
struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad);
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
- *ibucontext)
-{
- return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
/*
* Compare the lower 24 bits of the two values.
* Returns an integer <, ==, or > than zero.
@@ -550,6 +617,13 @@ static inline int ipath_cmp24(u32 a, u32 b)
struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+ struct ipath_verbs_counters *cntrs);
+
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -566,8 +640,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
int ipath_destroy_qp(struct ib_qp *ibqp);
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask);
+ int attr_mask, struct ib_udata *udata);
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -580,23 +656,16 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-void ipath_do_rc_send(unsigned long data);
-
-void ipath_do_uc_send(unsigned long data);
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+ u32 *hdr, u32 len, struct ipath_sge_state *ss);
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
- u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
- struct ib_sge *sge, int acc);
-
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
+int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
@@ -616,10 +685,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -630,7 +699,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_udata *udata);
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
- enum ib_srq_attr_mask attr_mask);
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
@@ -672,13 +742,45 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
void ipath_insert_rnr_queue(struct ipath_qp *qp);
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
-void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc);
+u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords);
+
+void ipath_do_ruc_send(unsigned long data);
+
+u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
+ u32 pmtu);
+
+int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
+ u32 pmtu, u32 *bth0p, u32 *bth2p);
+
+int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
+ u32 pmtu, u32 *bth0p, u32 *bth2p);
+
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+void ipath_ib_timer(struct ipath_ibdev *);
+
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
@@ -688,6 +790,26 @@ extern const int ib_ipath_state_ops[];
extern unsigned int ib_ipath_lkey_table_size;
+extern unsigned int ib_ipath_max_cqes;
+
+extern unsigned int ib_ipath_max_cqs;
+
+extern unsigned int ib_ipath_max_qp_wrs;
+
+extern unsigned int ib_ipath_max_qps;
+
+extern unsigned int ib_ipath_max_sges;
+
+extern unsigned int ib_ipath_max_mcast_grps;
+
+extern unsigned int ib_ipath_max_mcast_qp_attached;
+
+extern unsigned int ib_ipath_max_srqs;
+
+extern unsigned int ib_ipath_max_srq_sges;
+
+extern unsigned int ib_ipath_max_srq_wrs;
+
extern const u32 ib_ipath_rnr_table[];
#endif /* IPATH_VERBS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 10b31d2c4f20..085e28b939ec 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -92,6 +93,7 @@ static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
+ mcast->n_attached = 0;
bail:
return mcast;
@@ -157,7 +159,8 @@ bail:
* the table but the QP was added. Return ESRCH if the QP was already
* attached and neither structure was added.
*/
-static int ipath_mcast_add(struct ipath_mcast *mcast,
+static int ipath_mcast_add(struct ipath_ibdev *dev,
+ struct ipath_mcast *mcast,
struct ipath_mcast_qp *mqp)
{
struct rb_node **n = &mcast_tree.rb_node;
@@ -188,34 +191,52 @@ static int ipath_mcast_add(struct ipath_mcast *mcast,
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
- spin_unlock_irqrestore(&mcast_lock, flags);
ret = ESRCH;
goto bail;
}
}
+ if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ tmcast->n_attached++;
+
list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
- spin_unlock_irqrestore(&mcast_lock, flags);
ret = EEXIST;
goto bail;
}
+ spin_lock(&dev->n_mcast_grps_lock);
+ if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+ spin_unlock(&dev->n_mcast_grps_lock);
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ dev->n_mcast_grps_allocated++;
+ spin_unlock(&dev->n_mcast_grps_lock);
+
+ mcast->n_attached++;
+
list_add_tail_rcu(&mqp->list, &mcast->qp_list);
atomic_inc(&mcast->refcount);
rb_link_node(&mcast->rb_node, pn, n);
rb_insert_color(&mcast->rb_node, &mcast_tree);
- spin_unlock_irqrestore(&mcast_lock, flags);
-
ret = 0;
bail:
+ spin_unlock_irqrestore(&mcast_lock, flags);
+
return ret;
}
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_mcast *mcast;
struct ipath_mcast_qp *mqp;
int ret;
@@ -235,7 +256,7 @@ int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
ret = -ENOMEM;
goto bail;
}
- switch (ipath_mcast_add(mcast, mqp)) {
+ switch (ipath_mcast_add(dev, mcast, mqp)) {
case ESRCH:
/* Neither was used: can't attach the same QP twice. */
ipath_mcast_qp_free(mqp);
@@ -245,6 +266,12 @@ int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
case EEXIST: /* The mcast wasn't used */
ipath_mcast_free(mcast);
break;
+ case ENOMEM:
+ /* Exceeded the maximum number of mcast groups. */
+ ipath_mcast_qp_free(mqp);
+ ipath_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
default:
break;
}
@@ -258,6 +285,7 @@ bail:
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_mcast *mcast = NULL;
struct ipath_mcast_qp *p, *tmp;
struct rb_node *n;
@@ -272,7 +300,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
while (1) {
if (n == NULL) {
spin_unlock_irqrestore(&mcast_lock, flags);
- ret = 0;
+ ret = -EINVAL;
goto bail;
}
@@ -296,6 +324,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
* link until we are sure there are no list walkers.
*/
list_del_rcu(&p->list);
+ mcast->n_attached--;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
@@ -319,6 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
+ spin_lock(&dev->n_mcast_grps_lock);
+ dev->n_mcast_grps_allocated--;
+ spin_unlock(&dev->n_mcast_grps_lock);
}
ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 000000000000..0095bb70f34e
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
+ *
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
+ *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
+ */
+int ipath_unordered_wc(void)
+{
+ return 1;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index adc5322f15c1..04696e62da87 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -122,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
"cookie is %d\n", cookie);
dd->ipath_wc_cookie = cookie;
+ dd->ipath_wc_base = (unsigned long) pioaddr;
+ dd->ipath_wc_len = (unsigned long) piolen;
}
}
@@ -135,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata *dd)
void ipath_disable_wc(struct ipath_devdata *dd)
{
if (dd->ipath_wc_cookie) {
+ int r;
ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
- mtrr_del(dd->ipath_wc_cookie, 0, 0);
- dd->ipath_wc_cookie = 0;
+ r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len);
+ if (r < 0)
+ dev_info(&dd->pcidev->dev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len, r);
+ dd->ipath_wc_cookie = 0; /* even on failure */
}
}
diff --git a/drivers/infiniband/hw/ipath/ips_common.h b/drivers/infiniband/hw/ipath/ips_common.h
deleted file mode 100644
index ab7cbbbfd03a..000000000000
--- a/drivers/infiniband/hw/ipath/ips_common.h
+++ /dev/null
@@ -1,263 +0,0 @@
-#ifndef IPS_COMMON_H
-#define IPS_COMMON_H
-/*
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_common.h"
-
-struct ipath_header {
- /*
- * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
- * 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
- * Port 3, TID 11, offset 14.
- */
- __le32 ver_port_tid_offset;
- __le16 chksum;
- __le16 pkt_flags;
-};
-
-struct ips_message_header {
- __be16 lrh[4];
- __be32 bth[3];
- /* fields below this point are in host byte order */
- struct ipath_header iph;
- __u8 sub_opcode;
- __u8 flags;
- __u16 src_rank;
- /* 24 bits. The upper 8 bit is available for other use */
- union {
- struct {
- unsigned ack_seq_num:24;
- unsigned port:4;
- unsigned unused:4;
- };
- __u32 ack_seq_num_org;
- };
- __u8 expected_tid_session_id;
- __u8 tinylen; /* to aid MPI */
- union {
- __u16 tag; /* to aid MPI */
- __u16 mqhdr; /* for PSM MQ */
- };
- union {
- __u32 mpi[4]; /* to aid MPI */
- __u32 data[4];
- __u64 mq[2]; /* for PSM MQ */
- struct {
- __u16 mtu;
- __u8 major_ver;
- __u8 minor_ver;
- __u32 not_used; //free
- __u32 run_id;
- __u32 client_ver;
- };
- };
-};
-
-struct ether_header {
- __be16 lrh[4];
- __be32 bth[3];
- struct ipath_header iph;
- __u8 sub_opcode;
- __u8 cmd;
- __be16 lid;
- __u16 mac[3];
- __u8 frag_num;
- __u8 seq_num;
- __le32 len;
- /* MUST be of word size due to PIO write requirements */
- __le32 csum;
- __le16 csum_offset;
- __le16 flags;
- __u16 first_2_bytes;
- __u8 unused[2]; /* currently unused */
-};
-
-/*
- * The PIO buffer used for sending infinipath messages must only be written
- * in 32-bit words, all the data must be written, and no writes can occur
- * after the last word is written (which transfers "ownership" of the buffer
- * to the chip and triggers the message to be sent).
- * Since the Linux sk_buff structure can be recursive, non-aligned, and
- * any number of bytes in each segment, we use the following structure
- * to keep information about the overall state of the copy operation.
- * This is used to save the information needed to store the checksum
- * in the right place before sending the last word to the hardware and
- * to buffer the last 0-3 bytes of non-word sized segments.
- */
-struct copy_data_s {
- struct ether_header *hdr;
- /* addr of PIO buf to write csum to */
- __u32 __iomem *csum_pio;
- __u32 __iomem *to; /* addr of PIO buf to write data to */
- __u32 device; /* which device to allocate PIO bufs from */
- __s32 error; /* set if there is an error. */
- __s32 extra; /* amount of data saved in u.buf below */
- __u32 len; /* total length to send in bytes */
- __u32 flen; /* frament length in words */
- __u32 csum; /* partial IP checksum */
- __u32 pos; /* position for partial checksum */
- __u32 offset; /* offset to where data currently starts */
- __s32 checksum_calc; /* set to 1 when csum has been calculated */
- struct sk_buff *skb;
- union {
- __u32 w;
- __u8 buf[4];
- } u;
-};
-
-/* IB - LRH header consts */
-#define IPS_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
-#define IPS_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
-
-#define IPS_OFFSET 0
-
-/*
- * defines the cut-off point between the header queue and eager/expected
- * TID queue
- */
-#define NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE \
- ((sizeof(struct ips_message_header) - \
- offsetof(struct ips_message_header, iph)) >> 2)
-
-/* OpCodes */
-#define OPCODE_IPS 0xC0
-#define OPCODE_ITH4X 0xC1
-
-/* OpCode 30 is use by stand-alone test programs */
-#define OPCODE_RAW_DATA 0xDE
-/* last OpCode (31) is reserved for test */
-#define OPCODE_TEST 0xDF
-
-/* sub OpCodes - ips */
-#define OPCODE_SEQ_DATA 0x01
-#define OPCODE_SEQ_CTRL 0x02
-
-#define OPCODE_SEQ_MQ_DATA 0x03
-#define OPCODE_SEQ_MQ_CTRL 0x04
-
-#define OPCODE_ACK 0x10
-#define OPCODE_NAK 0x11
-
-#define OPCODE_ERR_CHK 0x20
-#define OPCODE_ERR_CHK_PLS 0x21
-
-#define OPCODE_STARTUP 0x30
-#define OPCODE_STARTUP_ACK 0x31
-#define OPCODE_STARTUP_NAK 0x32
-
-#define OPCODE_STARTUP_EXT 0x34
-#define OPCODE_STARTUP_ACK_EXT 0x35
-#define OPCODE_STARTUP_NAK_EXT 0x36
-
-#define OPCODE_TIDS_RELEASE 0x40
-#define OPCODE_TIDS_RELEASE_CONFIRM 0x41
-
-#define OPCODE_CLOSE 0x50
-#define OPCODE_CLOSE_ACK 0x51
-/*
- * like OPCODE_CLOSE, but no complaint if other side has already closed.
- * Used when doing abort(), MPI_Abort(), etc.
- */
-#define OPCODE_ABORT 0x52
-
-/* sub OpCodes - ith4x */
-#define OPCODE_ENCAP 0x81
-#define OPCODE_LID_ARP 0x82
-
-/* Receive Header Queue: receive type (from infinipath) */
-#define RCVHQ_RCV_TYPE_EXPECTED 0
-#define RCVHQ_RCV_TYPE_EAGER 1
-#define RCVHQ_RCV_TYPE_NON_KD 2
-#define RCVHQ_RCV_TYPE_ERROR 3
-
-/* misc. */
-#define SIZE_OF_CRC 1
-
-#define EAGER_TID_ID INFINIPATH_I_TID_MASK
-
-#define IPS_DEFAULT_P_KEY 0xFFFF
-
-#define IPS_PERMISSIVE_LID 0xFFFF
-#define IPS_MULTICAST_LID_BASE 0xC000
-
-#define IPS_AETH_CREDIT_SHIFT 24
-#define IPS_AETH_CREDIT_MASK 0x1F
-#define IPS_AETH_CREDIT_INVAL 0x1F
-
-#define IPS_PSN_MASK 0xFFFFFF
-#define IPS_MSN_MASK 0xFFFFFF
-#define IPS_QPN_MASK 0xFFFFFF
-#define IPS_MULTICAST_QPN 0xFFFFFF
-
-/* functions for extracting fields from rcvhdrq entries */
-static inline __u32 ips_get_hdr_err_flags(const __le32 * rbuf)
-{
- return __le32_to_cpu(rbuf[1]);
-}
-
-static inline __u32 ips_get_index(const __le32 * rbuf)
-{
- return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
- & INFINIPATH_RHF_EGRINDEX_MASK;
-}
-
-static inline __u32 ips_get_rcv_type(const __le32 * rbuf)
-{
- return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
- & INFINIPATH_RHF_RCVTYPE_MASK;
-}
-
-static inline __u32 ips_get_length_in_bytes(const __le32 * rbuf)
-{
- return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
- & INFINIPATH_RHF_LENGTH_MASK) << 2;
-}
-
-static inline void *ips_get_first_protocol_header(const __u32 * rbuf)
-{
- return (void *)&rbuf[2];
-}
-
-static inline struct ips_message_header *ips_get_ips_header(const __u32 *
- rbuf)
-{
- return (struct ips_message_header *)&rbuf[2];
-}
-
-static inline __u32 ips_get_ipath_ver(__le32 hdrword)
-{
- return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
- & INFINIPATH_I_VERS_MASK;
-}
-
-#endif /* IPS_COMMON_H */
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644
index 40d693cf3f94..000000000000
--- a/drivers/infiniband/hw/ipath/verbs_debug.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _VERBS_DEBUG_H
-#define _VERBS_DEBUG_H
-
-/*
- * This file contains tracing code for the ib_ipath kernel module.
- */
-#ifndef _VERBS_DEBUGGING /* tracing enabled or not */
-#define _VERBS_DEBUGGING 1
-#endif
-
-extern unsigned ib_ipath_debug;
-
-#define _VERBS_ERROR(fmt,...) \
- do { \
- printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_UNIT_ERROR(unit,fmt,...) \
- do { \
- printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
- } while(0)
-
-#if _VERBS_DEBUGGING
-
-/*
- * Mask values for debugging. The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or
- * disable dynamically.
- * This can be set at modprobe time also:
- * modprobe ib_path ib_ipath_debug=3
- */
-
-#define __VERBS_INFO 0x1 /* generic low verbosity stuff */
-#define __VERBS_DBG 0x2 /* generic debug */
-#define __VERBS_VDBG 0x4 /* verbose debug */
-#define __VERBS_SMADBG 0x8000 /* sma packet debug */
-
-#define _VERBS_INFO(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
- printk(KERN_INFO "%s: " fmt,"ib_ipath", \
- ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_DBG(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
- printk(KERN_DEBUG "%s: " fmt, __func__, \
- ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_VDBG(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
- printk(KERN_DEBUG "%s: " fmt, __func__, \
- ##__VA_ARGS__); \
- } while(0)
-
-#define _VERBS_SMADBG(fmt,...) \
- do { \
- if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
- printk(KERN_DEBUG "%s: " fmt, __func__, \
- ##__VA_ARGS__); \
- } while(0)
-
-#else /* ! _VERBS_DEBUGGING */
-
-#define _VERBS_INFO(fmt,...)
-#define _VERBS_DBG(fmt,...)
-#define _VERBS_VDBG(fmt,...)
-#define _VERBS_SMADBG(fmt,...)
-
-#endif /* _VERBS_DEBUGGING */
-
-#endif /* _VERBS_DEBUG_H */
OpenPOWER on IntegriCloud