diff options
65 files changed, 2771 insertions, 1369 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index fd2ef4d29b6d..a0af560ed740 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -408,6 +408,10 @@ X!Edrivers/pnp/system.c !Edrivers/pnp/manager.c !Edrivers/pnp/support.c </sect1> + <sect1><title>Userspace IO devices</title> +!Edrivers/uio/uio.c +!Iinclude/linux/uio_driver.h + </sect1> </chapter> <chapter id="blkdev"> diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl new file mode 100644 index 000000000000..e3bb29a8d8dd --- /dev/null +++ b/Documentation/DocBook/uio-howto.tmpl @@ -0,0 +1,611 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" +"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []> + +<book id="index"> +<bookinfo> +<title>The Userspace I/O HOWTO</title> + +<author> + <firstname>Hans-Jürgen</firstname> + <surname>Koch</surname> + <authorblurb><para>Linux developer, Linutronix</para></authorblurb> + <affiliation> + <orgname> + <ulink url="http://www.linutronix.de">Linutronix</ulink> + </orgname> + + <address> + <email>hjk@linutronix.de</email> + </address> + </affiliation> +</author> + +<pubdate>2006-12-11</pubdate> + +<abstract> + <para>This HOWTO describes concept and usage of Linux kernel's + Userspace I/O system.</para> +</abstract> + +<revhistory> + <revision> + <revnumber>0.3</revnumber> + <date>2007-04-29</date> + <authorinitials>hjk</authorinitials> + <revremark>Added section about userspace drivers.</revremark> + </revision> + <revision> + <revnumber>0.2</revnumber> + <date>2007-02-13</date> + <authorinitials>hjk</authorinitials> + <revremark>Update after multiple mappings were added.</revremark> + </revision> + <revision> + <revnumber>0.1</revnumber> + <date>2006-12-11</date> + <authorinitials>hjk</authorinitials> + <revremark>First draft.</revremark> + </revision> +</revhistory> +</bookinfo> + +<chapter id="aboutthisdoc"> +<?dbhtml filename="about.html"?> +<title>About this document</title> + +<sect1 id="copyright"> +<?dbhtml filename="copyright.html"?> +<title>Copyright and License</title> +<para> + Copyright (c) 2006 by Hans-Jürgen Koch.</para> +<para> +This documentation is Free Software licensed under the terms of the +GPL version 2. +</para> +</sect1> + +<sect1 id="translations"> +<?dbhtml filename="translations.html"?> +<title>Translations</title> + +<para>If you know of any translations for this document, or you are +interested in translating it, please email me +<email>hjk@linutronix.de</email>. +</para> +</sect1> + +<sect1 id="preface"> +<title>Preface</title> + <para> + For many types of devices, creating a Linux kernel driver is + overkill. All that is really needed is some way to handle an + interrupt and provide access to the memory space of the + device. The logic of controlling the device does not + necessarily have to be within the kernel, as the device does + not need to take advantage of any of other resources that the + kernel provides. One such common class of devices that are + like this are for industrial I/O cards. + </para> + <para> + To address this situation, the userspace I/O system (UIO) was + designed. For typical industrial I/O cards, only a very small + kernel module is needed. The main part of the driver will run in + user space. This simplifies development and reduces the risk of + serious bugs within a kernel module. + </para> +</sect1> + +<sect1 id="thanks"> +<title>Acknowledgments</title> + <para>I'd like to thank Thomas Gleixner and Benedikt Spranger of + Linutronix, who have not only written most of the UIO code, but also + helped greatly writing this HOWTO by giving me all kinds of background + information.</para> +</sect1> + +<sect1 id="feedback"> +<title>Feedback</title> + <para>Find something wrong with this document? (Or perhaps something + right?) I would love to hear from you. Please email me at + <email>hjk@linutronix.de</email>.</para> +</sect1> +</chapter> + +<chapter id="about"> +<?dbhtml filename="about.html"?> +<title>About UIO</title> + +<para>If you use UIO for your card's driver, here's what you get:</para> + +<itemizedlist> +<listitem> + <para>only one small kernel module to write and maintain.</para> +</listitem> +<listitem> + <para>develop the main part of your driver in user space, + with all the tools and libraries you're used to.</para> +</listitem> +<listitem> + <para>bugs in your driver won't crash the kernel.</para> +</listitem> +<listitem> + <para>updates of your driver can take place without recompiling + the kernel.</para> +</listitem> +<listitem> + <para>if you need to keep some parts of your driver closed source, + you can do so without violating the GPL license on the kernel.</para> +</listitem> +</itemizedlist> + +<sect1 id="how_uio_works"> +<title>How UIO works</title> + <para> + Each UIO device is accessed through a device file and several + sysfs attribute files. The device file will be called + <filename>/dev/uio0</filename> for the first device, and + <filename>/dev/uio1</filename>, <filename>/dev/uio2</filename> + and so on for subsequent devices. + </para> + + <para><filename>/dev/uioX</filename> is used to access the + address space of the card. Just use + <function>mmap()</function> to access registers or RAM + locations of your card. + </para> + + <para> + Interrupts are handled by reading from + <filename>/dev/uioX</filename>. A blocking + <function>read()</function> from + <filename>/dev/uioX</filename> will return as soon as an + interrupt occurs. You can also use + <function>select()</function> on + <filename>/dev/uioX</filename> to wait for an interrupt. The + integer value read from <filename>/dev/uioX</filename> + represents the total interrupt count. You can use this number + to figure out if you missed some interrupts. + </para> + + <para> + To handle interrupts properly, your custom kernel module can + provide its own interrupt handler. It will automatically be + called by the built-in handler. + </para> + + <para> + For cards that don't generate interrupts but need to be + polled, there is the possibility to set up a timer that + triggers the interrupt handler at configurable time intervals. + See <filename>drivers/uio/uio_dummy.c</filename> for an + example of this technique. + </para> + + <para> + Each driver provides attributes that are used to read or write + variables. These attributes are accessible through sysfs + files. A custom kernel driver module can add its own + attributes to the device owned by the uio driver, but not added + to the UIO device itself at this time. This might change in the + future if it would be found to be useful. + </para> + + <para> + The following standard attributes are provided by the UIO + framework: + </para> +<itemizedlist> +<listitem> + <para> + <filename>name</filename>: The name of your device. It is + recommended to use the name of your kernel module for this. + </para> +</listitem> +<listitem> + <para> + <filename>version</filename>: A version string defined by your + driver. This allows the user space part of your driver to deal + with different versions of the kernel module. + </para> +</listitem> +<listitem> + <para> + <filename>event</filename>: The total number of interrupts + handled by the driver since the last time the device node was + read. + </para> +</listitem> +</itemizedlist> +<para> + These attributes appear under the + <filename>/sys/class/uio/uioX</filename> directory. Please + note that this directory might be a symlink, and not a real + directory. Any userspace code that accesses it must be able + to handle this. +</para> +<para> + Each UIO device can make one or more memory regions available for + memory mapping. This is necessary because some industrial I/O cards + require access to more than one PCI memory region in a driver. +</para> +<para> + Each mapping has its own directory in sysfs, the first mapping + appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>. + Subsequent mappings create directories <filename>map1/</filename>, + <filename>map2/</filename>, and so on. These directories will only + appear if the size of the mapping is not 0. +</para> +<para> + Each <filename>mapX/</filename> directory contains two read-only files + that show start address and size of the memory: +</para> +<itemizedlist> +<listitem> + <para> + <filename>addr</filename>: The address of memory that can be mapped. + </para> +</listitem> +<listitem> + <para> + <filename>size</filename>: The size, in bytes, of the memory + pointed to by addr. + </para> +</listitem> +</itemizedlist> + +<para> + From userspace, the different mappings are distinguished by adjusting + the <varname>offset</varname> parameter of the + <function>mmap()</function> call. To map the memory of mapping N, you + have to use N times the page size as your offset: +</para> +<programlisting format="linespecific"> +offset = N * getpagesize(); +</programlisting> + +</sect1> +</chapter> + +<chapter id="using-uio_dummy" xreflabel="Using uio_dummy"> +<?dbhtml filename="using-uio_dummy.html"?> +<title>Using uio_dummy</title> + <para> + Well, there is no real use for uio_dummy. Its only purpose is + to test most parts of the UIO system (everything except + hardware interrupts), and to serve as an example for the + kernel module that you will have to write yourself. + </para> + +<sect1 id="what_uio_dummy_does"> +<title>What uio_dummy does</title> + <para> + The kernel module <filename>uio_dummy.ko</filename> creates a + device that uses a timer to generate periodic interrupts. The + interrupt handler does nothing but increment a counter. The + driver adds two custom attributes, <varname>count</varname> + and <varname>freq</varname>, that appear under + <filename>/sys/devices/platform/uio_dummy/</filename>. + </para> + + <para> + The attribute <varname>count</varname> can be read and + written. The associated file + <filename>/sys/devices/platform/uio_dummy/count</filename> + appears as a normal text file and contains the total number of + timer interrupts. If you look at it (e.g. using + <function>cat</function>), you'll notice it is slowly counting + up. + </para> + + <para> + The attribute <varname>freq</varname> can be read and written. + The content of + <filename>/sys/devices/platform/uio_dummy/freq</filename> + represents the number of system timer ticks between two timer + interrupts. The default value of <varname>freq</varname> is + the value of the kernel variable <varname>HZ</varname>, which + gives you an interval of one second. Lower values will + increase the frequency. Try the following: + </para> +<programlisting format="linespecific"> +cd /sys/devices/platform/uio_dummy/ +echo 100 > freq +</programlisting> + <para> + Use <function>cat count</function> to see how the interrupt + frequency changes. + </para> +</sect1> +</chapter> + +<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module"> +<?dbhtml filename="custom_kernel_module.html"?> +<title>Writing your own kernel module</title> + <para> + Please have a look at <filename>uio_dummy.c</filename> as an + example. The following paragraphs explain the different + sections of this file. + </para> + +<sect1 id="uio_info"> +<title>struct uio_info</title> + <para> + This structure tells the framework the details of your driver, + Some of the members are required, others are optional. + </para> + +<itemizedlist> +<listitem><para> +<varname>char *name</varname>: Required. The name of your driver as +it will appear in sysfs. I recommend using the name of your module for this. +</para></listitem> + +<listitem><para> +<varname>char *version</varname>: Required. This string appears in +<filename>/sys/class/uio/uioX/version</filename>. +</para></listitem> + +<listitem><para> +<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you +have memory that can be mapped with <function>mmap()</function>. For each +mapping you need to fill one of the <varname>uio_mem</varname> structures. +See the description below for details. +</para></listitem> + +<listitem><para> +<varname>long irq</varname>: Required. If your hardware generates an +interrupt, it's your modules task to determine the irq number during +initialization. If you don't have a hardware generated interrupt but +want to trigger the interrupt handler in some other way, set +<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>. The +uio_dummy module does this as it triggers the event mechanism in a timer +routine. If you had no interrupt at all, you could set +<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this +rarely makes sense. +</para></listitem> + +<listitem><para> +<varname>unsigned long irq_flags</varname>: Required if you've set +<varname>irq</varname> to a hardware interrupt number. The flags given +here will be used in the call to <function>request_irq()</function>. +</para></listitem> + +<listitem><para> +<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct +*vma)</varname>: Optional. If you need a special +<function>mmap()</function> function, you can set it here. If this +pointer is not NULL, your <function>mmap()</function> will be called +instead of the built-in one. +</para></listitem> + +<listitem><para> +<varname>int (*open)(struct uio_info *info, struct inode *inode) +</varname>: Optional. You might want to have your own +<function>open()</function>, e.g. to enable interrupts only when your +device is actually used. +</para></listitem> + +<listitem><para> +<varname>int (*release)(struct uio_info *info, struct inode *inode) +</varname>: Optional. If you define your own +<function>open()</function>, you will probably also want a custom +<function>release()</function> function. +</para></listitem> +</itemizedlist> + +<para> +Usually, your device will have one or more memory regions that can be mapped +to user space. For each region, you have to set up a +<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array. +Here's a description of the fields of <varname>struct uio_mem</varname>: +</para> + +<itemizedlist> +<listitem><para> +<varname>int memtype</varname>: Required if the mapping is used. Set this to +<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your +card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical +memory (e.g. allocated with <function>kmalloc()</function>). There's also +<varname>UIO_MEM_VIRTUAL</varname> for virtual memory. +</para></listitem> + +<listitem><para> +<varname>unsigned long addr</varname>: Required if the mapping is used. +Fill in the address of your memory block. This address is the one that +appears in sysfs. +</para></listitem> + +<listitem><para> +<varname>unsigned long size</varname>: Fill in the size of the +memory block that <varname>addr</varname> points to. If <varname>size</varname> +is zero, the mapping is considered unused. Note that you +<emphasis>must</emphasis> initialize <varname>size</varname> with zero for +all unused mappings. +</para></listitem> + +<listitem><para> +<varname>void *internal_addr</varname>: If you have to access this memory +region from within your kernel module, you will want to map it internally by +using something like <function>ioremap()</function>. Addresses +returned by this function cannot be mapped to user space, so you must not +store it in <varname>addr</varname>. Use <varname>internal_addr</varname> +instead to remember such an address. +</para></listitem> +</itemizedlist> + +<para> +Please do not touch the <varname>kobj</varname> element of +<varname>struct uio_mem</varname>! It is used by the UIO framework +to set up sysfs files for this mapping. Simply leave it alone. +</para> +</sect1> + +<sect1 id="adding_irq_handler"> +<title>Adding an interrupt handler</title> + <para> + What you need to do in your interrupt handler depends on your + hardware and on how you want to handle it. You should try to + keep the amount of code in your kernel interrupt handler low. + If your hardware requires no action that you + <emphasis>have</emphasis> to perform after each interrupt, + then your handler can be empty.</para> <para>If, on the other + hand, your hardware <emphasis>needs</emphasis> some action to + be performed after each interrupt, then you + <emphasis>must</emphasis> do it in your kernel module. Note + that you cannot rely on the userspace part of your driver. Your + userspace program can terminate at any time, possibly leaving + your hardware in a state where proper interrupt handling is + still required. + </para> + + <para> + There might also be applications where you want to read data + from your hardware at each interrupt and buffer it in a piece + of kernel memory you've allocated for that purpose. With this + technique you could avoid loss of data if your userspace + program misses an interrupt. + </para> + + <para> + A note on shared interrupts: Your driver should support + interrupt sharing whenever this is possible. It is possible if + and only if your driver can detect whether your hardware has + triggered the interrupt or not. This is usually done by looking + at an interrupt status register. If your driver sees that the + IRQ bit is actually set, it will perform its actions, and the + handler returns IRQ_HANDLED. If the driver detects that it was + not your hardware that caused the interrupt, it will do nothing + and return IRQ_NONE, allowing the kernel to call the next + possible interrupt handler. + </para> + + <para> + If you decide not to support shared interrupts, your card + won't work in computers with no free interrupts. As this + frequently happens on the PC platform, you can save yourself a + lot of trouble by supporting interrupt sharing. + </para> +</sect1> + +</chapter> + +<chapter id="userspace_driver" xreflabel="Writing a driver in user space"> +<?dbhtml filename="userspace_driver.html"?> +<title>Writing a driver in userspace</title> + <para> + Once you have a working kernel module for your hardware, you can + write the userspace part of your driver. You don't need any special + libraries, your driver can be written in any reasonable language, + you can use floating point numbers and so on. In short, you can + use all the tools and libraries you'd normally use for writing a + userspace application. + </para> + +<sect1 id="getting_uio_information"> +<title>Getting information about your UIO device</title> + <para> + Information about all UIO devices is available in sysfs. The + first thing you should do in your driver is check + <varname>name</varname> and <varname>version</varname> to + make sure your talking to the right device and that its kernel + driver has the version you expect. + </para> + <para> + You should also make sure that the memory mapping you need + exists and has the size you expect. + </para> + <para> + There is a tool called <varname>lsuio</varname> that lists + UIO devices and their attributes. It is available here: + </para> + <para> + <ulink url="http://www.osadl.org/projects/downloads/UIO/user/"> + http://www.osadl.org/projects/downloads/UIO/user/</ulink> + </para> + <para> + With <varname>lsuio</varname> you can quickly check if your + kernel module is loaded and which attributes it exports. + Have a look at the manpage for details. + </para> + <para> + The source code of <varname>lsuio</varname> can serve as an + example for getting information about an UIO device. + The file <filename>uio_helper.c</filename> contains a lot of + functions you could use in your userspace driver code. + </para> +</sect1> + +<sect1 id="mmap_device_memory"> +<title>mmap() device memory</title> + <para> + After you made sure you've got the right device with the + memory mappings you need, all you have to do is to call + <function>mmap()</function> to map the device's memory + to userspace. + </para> + <para> + The parameter <varname>offset</varname> of the + <function>mmap()</function> call has a special meaning + for UIO devices: It is used to select which mapping of + your device you want to map. To map the memory of + mapping N, you have to use N times the page size as + your offset: + </para> +<programlisting format="linespecific"> + offset = N * getpagesize(); +</programlisting> + <para> + N starts from zero, so if you've got only one memory + range to map, set <varname>offset = 0</varname>. + A drawback of this technique is that memory is always + mapped beginning with its start address. + </para> +</sect1> + +<sect1 id="wait_for_interrupts"> +<title>Waiting for interrupts</title> + <para> + After you successfully mapped your devices memory, you + can access it like an ordinary array. Usually, you will + perform some initialization. After that, your hardware + starts working and will generate an interrupt as soon + as it's finished, has some data available, or needs your + attention because an error occured. + </para> + <para> + <filename>/dev/uioX</filename> is a read-only file. A + <function>read()</function> will always block until an + interrupt occurs. There is only one legal value for the + <varname>count</varname> parameter of + <function>read()</function>, and that is the size of a + signed 32 bit integer (4). Any other value for + <varname>count</varname> causes <function>read()</function> + to fail. The signed 32 bit integer read is the interrupt + count of your device. If the value is one more than the value + you read the last time, everything is OK. If the difference + is greater than one, you missed interrupts. + </para> + <para> + You can also use <function>select()</function> on + <filename>/dev/uioX</filename>. + </para> +</sect1> + +</chapter> + +<appendix id="app1"> +<title>Further information</title> +<itemizedlist> + <listitem><para> + <ulink url="http://www.osadl.org"> + OSADL homepage.</ulink> + </para></listitem> + <listitem><para> + <ulink url="http://www.linutronix.de"> + Linutronix homepage.</ulink> + </para></listitem> +</itemizedlist> +</appendix> + +</book> diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 1b5c70758a1a..66c8b4b165c1 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -308,3 +308,13 @@ Why: The arch/powerpc tree is the merged architecture for ppc32 and ppc64 Who: linuxppc-dev@ozlabs.org --------------------------- + +What: mthca driver's MSI support +When: January 2008 +Files: drivers/infiniband/hw/mthca/*.[ch] +Why: All mthca hardware also supports MSI-X, which provides + strictly more functionality than MSI. So there is no point in + having both MSI-X and MSI support in the driver. +Who: Roland Dreier <rolandd@cisco.com> + +--------------------------- diff --git a/drivers/Kconfig b/drivers/Kconfig index 7916f4b86d23..ae01d86070bb 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -84,4 +84,5 @@ source "drivers/auxdisplay/Kconfig" source "drivers/kvm/Kconfig" +source "drivers/uio/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 6d9d7fab77f5..c34c8efff609 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_FUSION) += message/ obj-$(CONFIG_FIREWIRE) += firewire/ obj-$(CONFIG_IEEE1394) += ieee1394/ +obj-$(CONFIG_UIO) += uio/ obj-y += cdrom/ obj-y += auxdisplay/ obj-$(CONFIG_MTD) += mtd/ diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 41476abc0693..db703758db98 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -224,6 +224,7 @@ ohci_update_phy_reg(struct fw_card *card, int addr, u32 val, old; reg_write(ohci, OHCI1394_PhyControl, OHCI1394_PhyControl_Read(addr)); + flush_writes(ohci); msleep(2); val = reg_read(ohci, OHCI1394_PhyControl); if ((val & OHCI1394_PhyControl_ReadDone) == 0) { @@ -586,7 +587,7 @@ static void context_stop(struct context *ctx) break; fw_notify("context_stop: still active (0x%08x)\n", reg); - msleep(1); + mdelay(1); } } diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 7c53be0387fb..fc984474162c 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -840,7 +840,6 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) container_of(base_orb, struct sbp2_command_orb, base); struct fw_unit *unit = orb->unit; struct fw_device *device = fw_device(unit->device.parent); - struct scatterlist *sg; int result; if (status != NULL) { @@ -876,11 +875,10 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) dma_unmap_single(device->card->device, orb->base.request_bus, sizeof(orb->request), DMA_TO_DEVICE); - if (orb->cmd->use_sg > 0) { - sg = (struct scatterlist *)orb->cmd->request_buffer; - dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, + if (scsi_sg_count(orb->cmd) > 0) + dma_unmap_sg(device->card->device, scsi_sglist(orb->cmd), + scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); - } if (orb->page_table_bus != 0) dma_unmap_single(device->card->device, orb->page_table_bus, @@ -901,8 +899,8 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb) int sg_len, l, i, j, count; dma_addr_t sg_addr; - sg = (struct scatterlist *)orb->cmd->request_buffer; - count = dma_map_sg(device->card->device, sg, orb->cmd->use_sg, + sg = scsi_sglist(orb->cmd); + count = dma_map_sg(device->card->device, sg, scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); if (count == 0) goto fail; @@ -971,7 +969,7 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb) return 0; fail_page_table: - dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, + dma_unmap_sg(device->card->device, sg, scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); fail: return -ENOMEM; @@ -1031,7 +1029,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) orb->request.misc |= COMMAND_ORB_DIRECTION(SBP2_DIRECTION_TO_MEDIA); - if (cmd->use_sg && sbp2_command_orb_map_scatterlist(orb) < 0) + if (scsi_sg_count(cmd) && sbp2_command_orb_map_scatterlist(orb) < 0) goto fail_mapping; fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request)); diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c index 80d0121463d0..3ce8e2fbe15f 100644 --- a/drivers/firewire/fw-transaction.c +++ b/drivers/firewire/fw-transaction.c @@ -605,8 +605,10 @@ fw_send_response(struct fw_card *card, struct fw_request *request, int rcode) * check is sufficient to ensure we don't send response to * broadcast packets or posted writes. */ - if (request->ack != ACK_PENDING) + if (request->ack != ACK_PENDING) { + kfree(request); return; + } if (rcode == RCODE_COMPLETE) fw_fill_response(&request->response, request->request_header, @@ -628,11 +630,6 @@ fw_core_handle_request(struct fw_card *card, struct fw_packet *p) unsigned long flags; int tcode, destination, source; - if (p->payload_length > 2048) { - /* FIXME: send error response. */ - return; - } - if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) return; diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h index 5abed193f4a6..5ceaccd10564 100644 --- a/drivers/firewire/fw-transaction.h +++ b/drivers/firewire/fw-transaction.h @@ -123,6 +123,10 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, size_t length, void *callback_data); +/* + * Important note: The callback must guarantee that either fw_send_response() + * or kfree() is called on the @request. + */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, int tcode, int destination, int source, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 9820c67ba47d..4df269f5d9ac 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3374,7 +3374,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); -void cm_get_ack_delay(struct cm_device *cm_dev) +static void cm_get_ack_delay(struct cm_device *cm_dev) { struct ib_device_attr attr; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 23af7a032a03..9ffb9987450a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -573,7 +573,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, break; case RDMA_TRANSPORT_IWARP: if (!id_priv->cm_id.iw) { - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3b41dc0c39dd..5dc68cd5621b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1914,6 +1914,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) fail3: cxgb3_free_stid(ep->com.tdev, ep->stid); fail2: + cm_id->rem_ref(cm_id); put_ep(&ep->com); fail1: out: diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 3cd6bf3402d1..e53a97af1260 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -79,7 +79,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.ipd = (ah_mult > 0) ? ((ehca_mult - 1) / ah_mult) : 0; } else - av->av.ipd = ehca_static_rate; + av->av.ipd = ehca_static_rate; av->av.lnh = ah_attr->ah_flags; av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index daf823ea1ace..043e4fb23fb0 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -204,11 +204,11 @@ struct ehca_mr { spinlock_t mrlock; enum ehca_mr_flag flags; - u32 num_pages; /* number of MR pages */ - u32 num_4k; /* number of 4k "page" portions to form MR */ + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ int acl; /* ACL (stored here for usage in reregister) */ u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ + /* usage in reregister) */ u64 size; /* size (stored here for usage in reregister) */ u32 fmr_page_size; /* page size for FMR */ u32 fmr_max_pages; /* max pages for FMR */ @@ -217,9 +217,6 @@ struct ehca_mr { /* fw specific data */ struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ struct h_galpas galpas; - /* data for userspace bridge */ - u32 nr_of_pages; - void *pagearray; }; struct ehca_mw { @@ -241,26 +238,29 @@ enum ehca_mr_pgi_type { struct ehca_mr_pginfo { enum ehca_mr_pgi_type type; - u64 num_pages; - u64 page_cnt; - u64 num_4k; /* number of 4k "page" portions */ - u64 page_4k_cnt; /* counter for 4k "page" portions */ - u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ - - /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - - /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct ib_umem_chunk *next_chunk; - u64 next_nmap; - - /* type EHCA_MR_PGI_FMR section */ - u64 *page_list; - u64 next_listelem; - /* next_4k also used within EHCA_MR_PGI_FMR */ + u64 num_kpages; + u64 kpage_cnt; + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; }; /* output parameters for MR/FMR hipz calls */ @@ -391,6 +391,6 @@ struct ehca_alloc_qp_parms { int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index fb3df5c271e7..1798e6466bd0 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -154,83 +154,83 @@ struct hcp_modify_qp_control_block { u32 reserved_70_127[58]; /* 70 */ }; -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) -#define MQPCB_DLID EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) #endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 01d4a148bd71..9e87883b561a 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) return ret; } -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) { struct ehca_qp *ret = NULL; unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 4961eb88827c..4825975f88cf 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -96,7 +96,8 @@ int ehca_create_eq(struct ehca_shca *shca, for (i = 0; i < nr_pages; i++) { u64 rpage; - if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) { ret = H_RESOURCE; goto create_eq_exit2; } diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index bbd3c6a5822f..fc19ef9fd963 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -127,6 +127,7 @@ int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -137,7 +138,8 @@ int ehca_query_port(struct ib_device *ibdev, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_port1; @@ -197,6 +199,7 @@ int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, struct ehca_sma_attr *attr) { int ret = 0; + u64 h_ret; struct hipz_query_port *rblock; rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); @@ -205,7 +208,8 @@ int ehca_query_sma_attr(struct ehca_shca *shca, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_sma_attr1; @@ -230,9 +234,11 @@ query_sma_attr1: int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + u64 h_ret; + struct ehca_shca *shca; struct hipz_query_port *rblock; + shca = container_of(ibdev, struct ehca_shca, ib_device); if (index > 16) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; @@ -244,7 +250,8 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_pkey1; @@ -262,6 +269,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -277,7 +285,8 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_gid1; @@ -302,11 +311,12 @@ int ehca_modify_port(struct ib_device *ibdev, struct ib_port_modify *props) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_shca *shca; struct hipz_query_port *rblock; u32 cap; u64 hret; + shca = container_of(ibdev, struct ehca_shca, ib_device); if ((props->set_port_cap_mask | props->clr_port_cap_mask) & ~allowed_port_caps) { ehca_err(&shca->ib_device, "Non-changeable bits set in masks " @@ -325,7 +335,8 @@ int ehca_modify_port(struct ib_device *ibdev, goto modify_port1; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto modify_port2; @@ -337,7 +348,8 @@ int ehca_modify_port(struct ib_device *ibdev, hret = hipz_h_modify_port(shca->ipz_hca_handle, port, cap, props->init_type, port_modify_mask); if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); + ehca_err(&shca->ib_device, "Modify port failed hret=%lx", + hret); ret = -EINVAL; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 96eba3830754..4fb01fcb63ae 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -49,26 +49,26 @@ #include "hipz_fns.h" #include "ipz_pt_fn.h" -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) static void queue_comp_task(struct ehca_cq *__cq); -static struct ehca_comp_pool* pool; +static struct ehca_comp_pool *pool; #ifdef CONFIG_HOTPLUG_CPU static struct notifier_block comp_pool_callback_nb; #endif @@ -85,8 +85,8 @@ static inline void comp_event_callback(struct ehca_cq *cq) return; } -static void print_error_data(struct ehca_shca * shca, void* data, - u64* rblock, int length) +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) { u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); u64 resource = rblock[1]; @@ -94,7 +94,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, switch (type) { case 0x1: /* Queue Pair */ { - struct ehca_qp *qp = (struct ehca_qp*)data; + struct ehca_qp *qp = (struct ehca_qp *)data; /* only print error data if AER is set */ if (rblock[6] == 0) @@ -107,7 +107,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, } case 0x4: /* Completion Queue */ { - struct ehca_cq *cq = (struct ehca_cq*)data; + struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, "CQ 0x%x (resource=%lx) has errors.", @@ -572,7 +572,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +static inline int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -636,7 +636,7 @@ static void queue_comp_task(struct ehca_cq *__cq) __queue_comp_task(__cq, cct); } -static void run_comp_task(struct ehca_cpu_comp_task* cct) +static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; unsigned long flags; @@ -666,12 +666,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) static int comp_task(void *__cct) { - struct ehca_cpu_comp_task* cct = __cct; + struct ehca_cpu_comp_task *cct = __cct; int cql_empty; DECLARE_WAITQUEUE(wait, current); set_current_state(TASK_INTERRUPTIBLE); - while(!kthread_should_stop()) { + while (!kthread_should_stop()) { add_wait_queue(&cct->wait_queue, &wait); spin_lock_irq(&cct->task_lock); @@ -745,7 +745,7 @@ static void take_over_work(struct ehca_comp_pool *pool, list_splice_init(&cct->cq_list, &list); - while(!list_empty(&list)) { + while (!list_empty(&list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); list_del(&cq->entry); @@ -768,7 +768,7 @@ static int comp_pool_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if(!create_comp_task(pool, cpu)) { + if (!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); return NOTIFY_BAD; } @@ -838,7 +838,7 @@ int ehca_create_comp_pool(void) #ifdef CONFIG_HOTPLUG_CPU comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority =0; + comp_pool_callback_nb.priority = 0; register_cpu_notifier(&comp_pool_callback_nb); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 77aeca6a2c2f..dce503bb7d6b 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -81,8 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, int num_phys_buf, int mr_access_flags, u64 *iova_start); -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata); +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); int ehca_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, @@ -192,7 +193,7 @@ void ehca_poll_eqs(unsigned long data); void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 28ba2dd24216..36377c6db3d4 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -107,7 +107,7 @@ static DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache = NULL; +static struct kmem_cache *ctblk_cache; void *ehca_alloc_fw_ctrlblock(gfp_t flags) { @@ -200,8 +200,8 @@ static void ehca_destroy_slab_caches(void) #endif } -#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) -#define EHCA_REVID EHCA_BMASK_IBM(40,63) +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) static struct cap_descr { u64 mask; @@ -263,22 +263,27 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - if ((hcaaver == 1) && (revid == 0)) - shca->hw_level = 0x11; - else if ((hcaaver == 1) && (revid == 1)) - shca->hw_level = 0x12; - else if ((hcaaver == 1) && (revid == 2)) - shca->hw_level = 0x13; - else if ((hcaaver == 2) && (revid == 0)) - shca->hw_level = 0x21; - else if ((hcaaver == 2) && (revid == 0x10)) - shca->hw_level = 0x22; - else { + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { ehca_gen_warn("unknown hardware version" " - assuming default level"); shca->hw_level = 0x22; } - } + } else + shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); shca->sport[0].rate = IB_RATE_30_GBPS; @@ -290,7 +295,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - port = (struct hipz_query_port *) rblock; + port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); if (h_ret != H_SUCCESS) { ehca_gen_err("Cannot query port properties. h_ret=%lx", @@ -439,7 +444,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); @@ -666,7 +671,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, } /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); if (IS_ERR(ibpd)) { ehca_err(&shca->ib_device, "Cannot create internal PD."); ret = PTR_ERR(ibpd); @@ -863,18 +868,21 @@ int __init ehca_module_init(void) printk(KERN_INFO "eHCA Infiniband Device Driver " "(Rel.: SVNEHCA_0023)\n"); - if ((ret = ehca_create_comp_pool())) { + ret = ehca_create_comp_pool(); + if (ret) { ehca_gen_err("Cannot create comp pool."); return ret; } - if ((ret = ehca_create_slab_caches())) { + ret = ehca_create_slab_caches(); + if (ret) { ehca_gen_err("Cannot create SLAB caches"); ret = -ENOMEM; goto module_init1; } - if ((ret = ibmebus_register_driver(&ehca_driver))) { + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; goto module_init2; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index add79bd44e39..6262c5462d50 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -48,6 +48,11 @@ #include "hcp_if.h" #include "hipz_hw.h" +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; @@ -56,9 +61,9 @@ static struct ehca_mr *ehca_mr_new(void) struct ehca_mr *me; me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mrlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -74,9 +79,9 @@ static struct ehca_mw *ehca_mw_new(void) struct ehca_mw *me; me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mwlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -106,11 +111,12 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); if (ret) { + ehca_mr_delete(e_maxmr); ib_mr = ERR_PTR(ret); goto get_dma_mr_exit0; } @@ -144,9 +150,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); u64 size; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ if ((num_phys_buf <= 0) || !phys_buf_array) { ehca_err(pd->device, "bad input values: num_phys_buf=%x " @@ -190,12 +193,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit0; } - /* determine number of MR pages */ - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - /* register MR on HCA */ if (ehca_mr_is_maxmr(size, iova_start)) { e_mr->flags |= EHCA_MR_FLAG_MAXMR; @@ -207,13 +204,22 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit1; } } else { - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + + size, EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, @@ -240,18 +246,19 @@ reg_phys_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata) +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) { struct ib_mr *ib_mr; struct ehca_mr *e_mr; struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; int ret; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; if (!pd) { ehca_gen_err("bad pd=%p", pd); @@ -289,7 +296,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt e_mr->umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); if (IS_ERR(e_mr->umem)) { - ib_mr = (void *) e_mr->umem; + ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; } @@ -301,23 +308,24 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt } /* determine number of MR pages */ - num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / - PAGE_SIZE); - num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + num_hwpages = NUM_CHUNKS((virt % EHCA_PAGESIZE) + length, + EHCA_PAGESIZE); /* register MR on HCA */ - pginfo.type = EHCA_MR_PGI_USER; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.region = e_mr->umem; - pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; - pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, - (&e_mr->umem->chunk_list), - list); - - ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = e_mr->umem->offset / EHCA_PAGESIZE; + pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, + (&e_mr->umem->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); if (ret) { ib_mr = ERR_PTR(ret); goto reg_user_mr_exit2; @@ -360,9 +368,9 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, struct ehca_pd *new_pd; u32 tmp_lkey, tmp_rkey; unsigned long sl_flags; - u32 num_pages_mr = 0; - u32 num_pages_4k = 0; /* 4k portion "pages" */ - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -414,7 +422,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, goto rereg_phys_mr_exit0; } if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" " phys_buf_array=%p num_phys_buf=%x", mr_rereg_mask, phys_buf_array, num_phys_buf); ret = -EINVAL; @@ -438,10 +446,10 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, /* set requested values dependent on rereg request */ spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; /* new == old address */ - new_size = e_mr->size; /* new == old length */ - new_acl = e_mr->acl; /* new == old access control */ - new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); if (mr_rereg_mask & IB_MR_REREG_TRANS) { new_start = iova_start; /* change address */ @@ -458,17 +466,18 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, ret = -EINVAL; goto rereg_phys_mr_exit1; } - num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % EHCA_PAGESIZE) + + new_size, EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); } if (mr_rereg_mask & IB_MR_REREG_ACCESS) new_acl = mr_access_flags; @@ -510,7 +519,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && (my_pd->ownpid != cur_pid)) { @@ -536,14 +545,14 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) "hca_hndl=%lx mr_hndl=%lx lkey=%x", h_ret, mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_query_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto query_mr_exit1; } - mr_attr->pd = mr->pd; + mr_attr->pd = mr->pd; mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); query_mr_exit1: @@ -596,7 +605,7 @@ int ehca_dereg_mr(struct ib_mr *mr) "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", h_ret, shca, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto dereg_mr_exit0; } @@ -622,7 +631,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout = {{0},0}; + struct ehca_mw_hipzout_parms hipzout; e_mw = ehca_mw_new(); if (!e_mw) { @@ -636,7 +645,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " "shca=%p hca_hndl=%lx mw=%p", h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); goto alloc_mw_exit1; } /* successful MW allocation */ @@ -679,7 +688,7 @@ int ehca_dealloc_mw(struct ib_mw *mw) "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, e_mw->ipz_mw_handle.handle); - return ehca_mrmw_map_hrc_free_mw(h_ret); + return ehca2ib_return_code(h_ret); } /* successful deallocation */ ehca_mw_delete(e_mw); @@ -699,7 +708,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, struct ehca_mr *e_fmr; int ret; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; /* check other parameters */ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && @@ -745,6 +754,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, e_fmr->flags |= EHCA_MR_FLAG_FMR; /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, @@ -783,7 +793,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, container_of(fmr->device, struct ehca_shca, ib_device); struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; u32 tmp_lkey, tmp_rkey; if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { @@ -809,14 +819,16 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); } - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = list_len; - pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); - pginfo.page_list = page_list; - pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / - EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.num_hwpages = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = ((iova & (e_fmr->fmr_page_size-1)) / + EHCA_PAGESIZE); + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, list_len * e_fmr->fmr_page_size, e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); if (ret) @@ -831,8 +843,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, map_phys_fmr_exit0: if (ret) ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " - "iova=%lx", - ret, fmr, page_list, list_len, iova); + "iova=%lx", ret, fmr, page_list, list_len, iova); return ret; } /* end ehca_map_phys_fmr() */ @@ -922,7 +933,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto free_fmr_exit0; } /* successful deregistration */ @@ -950,12 +961,12 @@ int ehca_reg_mr(struct ehca_shca *shca, int ret; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; + hipz_acl |= 0x00000001; h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, (u64)iova_start, size, hipz_acl, @@ -963,7 +974,7 @@ int ehca_reg_mr(struct ehca_shca *shca, if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca_mrmw_map_hrc_alloc(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_mr_exit0; } @@ -974,11 +985,11 @@ int ehca_reg_mr(struct ehca_shca *shca, goto ehca_reg_mr_exit1; /* successful registration */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; return 0; @@ -988,10 +999,10 @@ ehca_reg_mr_exit1: if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", + "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x", h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_pages, - pginfo->num_4k, ret); + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " "not recoverable"); } @@ -999,9 +1010,9 @@ ehca_reg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_pages, pginfo->num_4k); + pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr() */ @@ -1026,24 +1037,24 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } /* max 512 pages per shot */ - for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { - rnum = pginfo->num_4k % 512; /* last shot */ + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ if (rnum == 0) - rnum = 512; /* last shot is full */ + rnum = MAX_RPAGES; /* last shot is full */ } else - rnum = 512; + rnum = MAX_RPAGES; - if (rnum > 1) { - ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " "bad rc, ret=%x rnum=%x kpage=%p", ret, rnum, kpage); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { rpage = virt_to_abs(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", @@ -1051,21 +1062,14 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = -EFAULT; goto ehca_reg_mr_rpages_exit1; } - } else { /* rnum==1 */ - ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 " - "bad rc, ret=%x i=%x", ret, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } + } else + rpage = *kpage; h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, 0, /* pagesize 4k */ 0, rpage, rnum); - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { /* * check for 'registration complete'==H_SUCCESS * and for 'page registered'==H_PAGE_REGISTERED @@ -1078,7 +1082,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1089,7 +1093,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, e_mr->ib.ib_mr.lkey, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle); - ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1101,8 +1105,8 @@ ehca_reg_mr_rpages_exit1: ehca_reg_mr_rpages_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, - pginfo->num_pages, pginfo->num_4k); + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr_rpages() */ @@ -1124,7 +1128,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, u64 *kpage; u64 rpage; struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1137,12 +1141,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, } pginfo_save = *pginfo; - ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); if (ret) { ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", - e_mr, pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k,kpage); + "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } rpage = virt_to_abs(kpage); @@ -1164,7 +1168,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); *pginfo = pginfo_save; ret = -EAGAIN; - } else if ((u64*)hipzout.vaddr != iova_start) { + } else if ((u64 *)hipzout.vaddr != iova_start) { ehca_err(&shca->ib_device, "PHYP changed iova_start in " "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, @@ -1176,11 +1180,11 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, * successful reregistration * note: start and start_out are identical for eServer HCAs */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; } @@ -1190,9 +1194,9 @@ ehca_rereg_mr_rereg1_exit1: ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx", - ret, *lkey, *rkey, pginfo, pginfo->num_pages, - pginfo->num_4k); + "pginfo=%p num_kpages=%lx num_hwpages=%lx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); return ret; } /* end ehca_rereg_mr_rereg1() */ @@ -1214,10 +1218,12 @@ int ehca_rereg_mr(struct ehca_shca *shca, int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ /* first determine reregistration hCall(s) */ - if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || - (pginfo->num_4k > e_mr->num_4k)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " - "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); rereg_1_hcall = 0; rereg_3_hcall = 1; } @@ -1253,7 +1259,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, h_ret, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_rereg_mr_exit0; } /* clean ehca_mr_t, without changing struct ib_mr and lock */ @@ -1281,9 +1287,9 @@ ehca_rereg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); return ret; } /* end ehca_rereg_mr() */ @@ -1295,97 +1301,86 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, { int ret = 0; u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ struct ehca_pd *e_pd = container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); struct ehca_mr save_fmr; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; - /* first check if reregistration hCall can be used for unmap */ - if (e_fmr->fmr_max_pages > 512) { - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (rereg_1_hcall) { + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { /* * note: after using rereg hcall with len=0, * rereg hcall must be used again for registering pages */ h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " - "mr_hndl=%lx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - rereg_3_hcall = 1; - } else { + if (h_ret == H_SUCCESS) { /* successful reregistration */ e_fmr->start = NULL; e_fmr->size = 0; tmp_lkey = hipzout.lkey; tmp_rkey = hipzout.rkey; + return 0; } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " + "mr_hndl=%lx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ } - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = 0; - pginfo.num_4k = 0; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_unmap_one_fmr_exit0; - } + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = 0; + pginfo.num_hwpages = 0; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_unmap_one_fmr_exit0; } ehca_unmap_one_fmr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, - rereg_1_hcall, rereg_3_hcall); + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); return ret; } /* end ehca_unmap_one_fmr() */ @@ -1403,7 +1398,7 @@ int ehca_reg_smr(struct ehca_shca *shca, int ret = 0; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1419,15 +1414,15 @@ int ehca_reg_smr(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_smr_exit0; } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1453,10 +1448,10 @@ int ehca_reg_internal_maxmr( struct ehca_mr *e_mr; u64 *iova_start; u64 size_maxmr; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; struct ib_phys_buf ib_pbuf; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; e_mr = ehca_mr_new(); if (!e_mr) { @@ -1468,28 +1463,29 @@ int ehca_reg_internal_maxmr( /* register internal max-MR on HCA */ size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64*)KERNELBASE; + iova_start = (u64 *)KERNELBASE; ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = 1; - pginfo.phys_buf_array = &ib_pbuf; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + size_maxmr, + EHCA_PAGESIZE); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " - "num_pages_4k=%x", e_mr, iova_start, size_maxmr, - num_pages_mr, num_pages_4k); + "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); goto ehca_reg_internal_maxmr_exit1; } @@ -1524,7 +1520,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca, u64 h_ret; struct ehca_mr *e_origmr = shca->maxmr; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1538,14 +1534,14 @@ int ehca_reg_maxmr(struct ehca_shca *shca, h_ret, e_origmr, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - return ehca_mrmw_map_hrc_reg_smr(h_ret); + return ehca2ib_return_code(h_ret); } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1677,299 +1673,187 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, /*----------------------------------------------------------------------*/ -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; struct ib_umem_chunk *prev_chunk; struct ib_umem_chunk *chunk; - struct ib_phys_buf *pbuf; - u64 *fmrlist; - u64 num4k, pgaddr, offs4k; + u64 pgaddr; u32 i = 0; u32 j = 0; - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - while (pginfo->next_4k < offs4k + num4k) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, " - "page_cnt=%lx " - "num_pages=%lx " - "page_4k_cnt=%lx " - "num_4k=%lx i=%x", - pginfo->page_cnt, - pginfo->num_pages, - pginfo->page_4k_cnt, - pginfo->num_4k, i); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - *kpage = phys_to_abs( - (pbuf->addr & EHCA_PAGEMASK) - + (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx " - "pbuf->size=%lx " - "next_4k=%lx", pbuf->addr, - pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - /* loop over desired chunk entries */ - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - for (i = pginfo->next_nmap; i < chunk->nmap; ) { - pgaddr = ( page_to_pfn(chunk->page_list[i].page) - << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr + - (pginfo->next_4k * - EHCA_PAGESIZE)); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx " - "chunk->page_list[i]=%lx " - "i=%x next_4k=%lx mr=%p", - pgaddr, - (u64)sg_dma_address( - &chunk-> - page_list[i]), - i, pginfo->next_4k, e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - kpage++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; - i++; - } - j++; - if (j >= number) break; - } - if ((pginfo->next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) - break; - else - prev_chunk = chunk; - } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - /* loop over desired page_list entries */ - fmrlist = pginfo->page_list + pginfo->next_listelem; - for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + pgaddr = page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ; + *kpage = phys_to_abs(pgaddr + + (pginfo->next_hwpage * + EHCA_PAGESIZE)); if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, - pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + ehca_gen_err("pgaddr=%lx " + "chunk->page_list[i]=%lx " + "i=%x next_hwpage=%lx", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[i]), + i, pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; kpage++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - fmrlist++; - pginfo->next_4k = 0; + if (pginfo->next_hwpage % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + i++; } + j++; + if (j >= number) break; } - } else { - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; } - -ehca_set_pagebuf_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " - "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " - "next_listelem=%lx region=%p next_chunk=%p " - "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, - pginfo->num_pages, pginfo->num_4k, - pginfo->next_buf, pginfo->next_4k, number, kpage, - pginfo->page_cnt, pginfo->page_4k_cnt, i, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ +} -/* setup 1 page from page info page buffer */ -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage) +int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; - struct ib_phys_buf *tmp_pbuf; - u64 *fmrlist; - struct ib_umem_chunk *chunk; - struct ib_umem_chunk *prev_chunk; - u64 pgaddr, num4k, offs4k; - - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " - "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", - pginfo->page_cnt, pginfo->num_pages, - pginfo->page_4k_cnt, pginfo->num_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) && tmp_pbuf->addr ) { - ehca_gen_err("tmp_pbuf->addr=%lx" - " tmp_pbuf->size=%lx next_4k=%lx", - tmp_pbuf->addr, tmp_pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - pgaddr = ( page_to_pfn(chunk->page_list[ - pginfo->next_nmap].page) - << PAGE_SHIFT); - *rpage = phys_to_abs(pgaddr + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) ) { - ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx" - " next_nmap=%lx next_4k=%lx mr=%p", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[ - pginfo-> - next_nmap]), - pginfo->next_nmap, pginfo->next_4k, - e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % EHCA_PAGESIZE) + + pbuf->size, EHCA_PAGESIZE); + offs_hw = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%lx num_kpages=%lx " + "hwpage_cnt=%lx " + "num_hwpages=%lx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; } - if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; + *kpage = phys_to_abs( + (pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_hwpage * EHCA_PAGESIZE)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%lx " + "pbuf->size=%lx " + "next_hwpage=%lx", pbuf->addr, + pbuf->size, + pginfo->next_hwpage); + return -EFAULT; } - break; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (pginfo->next_hwpage % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->kpage_cnt)++; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - fmrlist = pginfo->page_list + pginfo->next_listelem; - *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); - if ( !(*rpage) ) { + } + return ret; +} + +int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_hwpage * EHCA_PAGESIZE); + if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + "next_listelem=%lx next_hwpage=%lx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - pginfo->next_4k = 0; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / EHCA_PAGESIZE) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; } - } else { + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = ehca_set_pagebuf_user1(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: ehca_gen_err("bad pginfo->type=%x", pginfo->type); ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + break; } - -ehca_set_pagebuf_1_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " - "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " - "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, - pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k, pginfo->next_buf, pginfo->next_4k, - rpage, pginfo->page_cnt, pginfo->page_4k_cnt, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); return ret; -} /* end ehca_set_pagebuf_1() */ +} /* end ehca_set_pagebuf() */ /*----------------------------------------------------------------------*/ @@ -1982,7 +1866,7 @@ int ehca_mr_is_maxmr(u64 size, { /* a MR is treated as max-MR only if it fits following: */ if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void*)KERNELBASE)) { + (iova_start == (void *)KERNELBASE)) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2042,196 +1926,23 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, /*----------------------------------------------------------------------*/ /* - * map HIPZ rc to IB retcodes for MR/MW allocations - * Used for hipz_mr_reg_alloc and hipz_mw_alloc. - */ -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_alloc() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering last page - */ -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* registration complete */ - return 0; - case H_PAGE_REGISTERED: /* page registered */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_last() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering one page, but not last page - */ -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_PAGE_REGISTERED: /* page registered */ - return 0; - case H_SUCCESS: /* registration complete */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ - -/*----------------------------------------------------------------------*/ - -/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_query_mr() */ - -/*----------------------------------------------------------------------*/ -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MR resource - * Used for hipz_h_free_resource_mr - */ -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mr() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MW resource - * Used for hipz_h_free_resource_mw - */ -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mw() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for SMR registrations - * Used for hipz_h_register_smr. - */ -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_MEM_PARM: /* invalid MR virtual address */ - case H_MEM_ACCESS_PARM: /* invalid access controls */ - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_reg_smr() */ - -/*----------------------------------------------------------------------*/ - -/* * MR destructor and constructor * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, * except struct ib_mr and spinlock */ void ehca_mr_deletenew(struct ehca_mr *mr) { - mr->flags = 0; - mr->num_pages = 0; - mr->num_4k = 0; - mr->acl = 0; - mr->start = NULL; + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; mr->fmr_page_size = 0; mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); memset(&mr->galpas, 0, sizeof(mr->galpas)); - mr->nr_of_pages = 0; - mr->pagearray = NULL; } /* end ehca_mr_deletenew() */ int ehca_init_mrmw_cache(void) diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index d936e40a5748..24f13fe3708b 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -101,15 +101,10 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, u64 *page_list, int list_len); -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, u32 number, u64 *kpage); -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage); - int ehca_mr_is_maxmr(u64 size, u64 *iova_start); @@ -121,20 +116,6 @@ void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, int *ib_acl); -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); - void ehca_mr_deletenew(struct ehca_mr *mr); #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 8707d297ce4c..818803057ebf 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -53,13 +53,13 @@ struct ehca_vsgentry { u32 length; }; -#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) /* * Unreliable Datagram Address Vector Format @@ -206,10 +206,10 @@ struct ehca_wqe { }; -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) -#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 74671250303f..48e9ceacd6fa 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -602,10 +602,10 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, /* UD circumvention */ parms.act_nr_send_sges -= 2; parms.act_nr_recv_sges -= 2; - swqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_recv_sges]); + swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ + parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ + parms.act_nr_recv_sges]); } if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { @@ -690,8 +690,8 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, if (my_qp->send_cq) { ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { - ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", - ret); + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%x", ret); goto create_qp_exit4; } } @@ -749,7 +749,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, struct ehca_qp *ret; ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp; + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; } int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, @@ -780,7 +780,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); if (IS_ERR(my_qp)) - return (struct ib_srq *) my_qp; + return (struct ib_srq *)my_qp; /* copy back return values */ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; @@ -875,7 +875,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, my_qp, qp_num, h_ret); return ehca2ib_return_code(h_ret); } - bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ @@ -890,7 +890,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, } /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { if (ehca_debug_level) @@ -898,7 +898,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = (*bad_wqe_cnt)+1; } /* @@ -1003,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " "new qp_state=%x attribute_mask=%x", my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); @@ -1019,7 +1019,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); else { ret = -EINVAL; @@ -1077,7 +1078,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, spin_lock_irqsave(&my_qp->spinlock_s, flags); squeue_locked = 1; /* mark next free wqe */ - wqe = (struct ehca_wqe*) + wqe = (struct ehca_wqe *) ipz_qeit_get(&my_qp->ipz_squeue); wqe->optype = wqe->wqef = 0xff; ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", @@ -1312,7 +1313,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " - "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } @@ -1411,7 +1412,7 @@ int ehca_query_qp(struct ib_qp *qp, } if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device,"Invalid attribute mask " + ehca_err(qp->device, "Invalid attribute mask " "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", my_qp, qp->qp_num, qp_attr_mask); return -EINVAL; @@ -1419,7 +1420,7 @@ int ehca_query_qp(struct ib_qp *qp, qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { - ehca_err(qp->device,"Out of memory for qpcb " + ehca_err(qp->device, "Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); return -ENOMEM; } @@ -1431,7 +1432,7 @@ int ehca_query_qp(struct ib_qp *qp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device,"hipz_h_query_qp() failed " + ehca_err(qp->device, "hipz_h_query_qp() failed " "ehca_qp=%p qp_num=%x h_ret=%lx", my_qp, qp->qp_num, h_ret); goto query_qp_exit1; @@ -1442,7 +1443,7 @@ int ehca_query_qp(struct ib_qp *qp, if (qp_attr->cur_qp_state == -EINVAL) { ret = -EINVAL; - ehca_err(qp->device,"Got invalid ehca_qp_state=%x " + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " "ehca_qp=%p qp_num=%x", qpcb->qp_state, my_qp, qp->qp_num); goto query_qp_exit1; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 61da65e6e5e0..94eed70fedf5 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -79,7 +79,8 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, } if (ehca_debug_level) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } @@ -99,7 +100,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; struct ib_sge *sge = send_wr->sg_list; ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x",idx, send_wr->wr_id, + "send_flags=%x opcode=%x", idx, send_wr->wr_id, send_wr->num_sge, send_wr->send_flags, send_wr->opcode); if (mad_hdr) { @@ -116,7 +117,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *) abs_to_virt(sge->addr); + u8 *data = (u8 *)abs_to_virt(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -534,9 +535,11 @@ poll_cq_one_read_cqe: cqe_count++; if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + struct ehca_qp *qp; int purgeflag; unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -551,8 +554,8 @@ poll_cq_one_read_cqe: spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { - ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " - "src_qp=%x", + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); if (ehca_debug_level) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 03b185f873da..678b81391861 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -93,14 +93,14 @@ extern int ehca_debug_level; #define ehca_gen_dbg(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) #define ehca_gen_warn(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) @@ -114,12 +114,12 @@ extern int ehca_debug_level; * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> */ #define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ + do { \ + unsigned int x; \ unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char*)(adr); \ + unsigned char *deb = (unsigned char *)(adr); \ for (x = 0; x < l; x += 16) { \ - printk("EHCA_DMP:%s " format \ + printk(KERN_INFO "EHCA_DMP:%s " format \ " adr=%p ofs=%04x %016lx %016lx\n", \ __FUNCTION__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ @@ -128,16 +128,16 @@ extern int ehca_debug_level; } while (0) /* define a bitmask, little endian version */ -#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) /* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) /* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) /* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) /** * EHCA_BMASK_SET - return value shifted and masked by mask @@ -145,14 +145,14 @@ extern int ehca_debug_level; * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask * in variable */ -#define EHCA_BMASK_SET(mask,value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask)) +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) /** * EHCA_BMASK_GET - extract a parameter from value by mask */ -#define EHCA_BMASK_GET(mask,value) \ - (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask))) +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) /* Converts ehca to ib return code */ @@ -161,8 +161,11 @@ static inline int ehca2ib_return_code(u64 ehca_rc) switch (ehca_rc) { case H_SUCCESS: return 0; + case H_RESOURCE: /* Resource in use */ case H_BUSY: return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ case H_NO_MEM: return -ENOMEM; default: diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 3031b3bb56f9..05c415744e3b 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -70,7 +70,7 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context) static void ehca_mm_open(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -86,7 +86,7 @@ static void ehca_mm_open(struct vm_area_struct *vma) static void ehca_mm_close(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -215,7 +215,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 2: /* qp rqueue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", @@ -227,7 +228,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 3: /* qp squeue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 4776a8b0feec..3394e05f4b4f 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -501,8 +501,8 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, return H_PARAMETER; } - return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, - qp_handle.handle,logical_address_of_page, + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, count); } @@ -522,9 +522,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, qp_handle.handle, /* r6 */ 0, 0, 0, 0, 0, 0); if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void*)outs[0]; + *log_addr_next_sq_wqe2processed = (void *)outs[0]; if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void*)outs[1]; + *log_addr_next_rq_wqe2processed = (void *)outs[1]; return ret; } diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 0b1a4772c78a..214821095cb1 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -50,7 +50,7 @@ int hcall_map_page(u64 physaddr, u64 *mapaddr) int hcall_unmap_page(u64 mapaddr) { - iounmap((volatile void __iomem*)mapaddr); + iounmap((volatile void __iomem *) mapaddr); return 0; } diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h index 20898a153446..868735fd3187 100644 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -53,10 +53,10 @@ #define hipz_galpa_load_cq(gal, offset) \ hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) -#define hipz_galpa_store_qp(gal,offset, value) \ +#define hipz_galpa_store_qp(gal, offset, value) \ hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) #define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) { diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index dad6dea5636b..d9739e554515 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -161,11 +161,11 @@ struct hipz_qptemm { /* 0x1000 */ }; -#define QPX_SQADDER EHCA_BMASK_IBM(48,63) -#define QPX_RQADDER EHCA_BMASK_IBM(48,63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3) +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) /* MRMWPT Entry Memory Map */ struct hipz_mrmwmm { @@ -187,7 +187,7 @@ struct hipz_mrmwmm { }; -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) struct hipz_qpedmm { /* 0x00 */ @@ -238,7 +238,7 @@ struct hipz_qpedmm { u64 qpedx_rrva3; }; -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) /* CQ Table Entry Memory Map */ struct hipz_cqtemm { @@ -263,12 +263,12 @@ struct hipz_cqtemm { /* 0x1000 */ }; -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) -#define CQX_FECADDER EHCA_BMASK_IBM(32,63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) /* EQ Table Entry Memory Map */ struct hipz_eqtemm { @@ -293,7 +293,7 @@ struct hipz_eqtemm { }; -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) /* access control defines for MR/MW */ #define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index bf7a40088f61..9606f13ed092 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -114,7 +114,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, */ f = 0; while (f < nr_of_pages) { - u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); + u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); int k; if (!kpage) goto ipz_queue_ctor_exit0; /*NOMEM*/ diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 007f0882fd40..39a4f64aff41 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -240,7 +240,7 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; ipz_qeit_eq_get_inc(queue); /* this is a good one */ @@ -250,7 +250,7 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 9361f5ab8bd6..09c5fd84b1e3 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1889,7 +1889,7 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, /* Below is "non-zero" to force override, but both actual LEDs are off */ #define LED_OVER_BOTH_OFF (8) -void ipath_run_led_override(unsigned long opaque) +static void ipath_run_led_override(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *)opaque; int timeoff; diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 6b9147964a4f..b4503e9c1e95 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -426,8 +426,8 @@ bail: * @buffer: data to write * @len: number of bytes to write */ -int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) +static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buffer, int len) { u8 single_byte; int sub_len; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 47aa43428fbf..1fd91c59f246 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -70,7 +70,7 @@ static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) * If rewrite is true, and bits are set in the sendbufferror registers, * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) +static void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) { u32 piobcnt; unsigned long sbuf[4]; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 3105005fc9d2..ace63ef78e6f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -776,7 +776,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *, int); /* * Set LED override, only the two LSBs have "public" meaning, but @@ -820,7 +819,6 @@ static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data) #define IPATH_MDIO_CTRL_8355_REG_10 0x1D int ipath_get_user_pages(unsigned long, size_t, struct page **); -int ipath_get_user_pages_nocopy(unsigned long, struct page **); void ipath_release_user_pages(struct page **, size_t); void ipath_release_user_pages_on_close(struct page **, size_t); int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 85256747d8a1..c69c25239443 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -507,7 +507,7 @@ static int want_buffer(struct ipath_devdata *dd) * * Called when we run out of PIO buffers. */ -void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) +static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 27034d38b3dd..0190edc8044e 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -171,32 +171,6 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages, return ret; } -/** - * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared - * @start_page: the page to lock - * @p: the output page structure - * - * This is similar to ipath_get_user_pages, but it's always one page, and we - * mark the page as locked for I/O, and shared. This is used for the user - * process page that contains the destination address for the rcvhdrq tail - * update, so we need to have the vma. If we don't do this, the page can be - * taken away from us on fork, even if the child never touches it, and then - * the user process never sees the tail register updates. - */ -int ipath_get_user_pages_nocopy(unsigned long page, struct page **p) -{ - struct vm_area_struct *vma; - int ret; - - down_write(¤t->mm->mmap_sem); - - ret = __get_user_pages(page, 1, p, &vma); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - void ipath_release_user_pages(struct page **p, size_t num_pages) { down_write(¤t->mm->mmap_sem); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 65f7181e9cf8..16aa61fd8085 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -488,7 +488,7 @@ bail:; * This is called from ipath_do_rcv_timer() at interrupt level to check for * QPs which need retransmits and to collect performance numbers. */ -void ipath_ib_timer(struct ipath_ibdev *dev) +static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; struct list_head *last; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index f3d1f2cee6f8..9bbe81967f14 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -782,8 +782,6 @@ void ipath_update_mmap_info(struct ipath_ibdev *dev, int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); - void ipath_insert_rnr_queue(struct ipath_qp *qp); int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); @@ -807,8 +805,6 @@ void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); int ipath_ib_piobufavail(struct ipath_ibdev *); -void ipath_ib_timer(struct ipath_ibdev *); - unsigned ipath_get_npkeys(struct ipath_devdata *); u32 ipath_get_cr_errpkey(struct ipath_devdata *); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 40042184ad58..b5a24fbef70d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1183,6 +1183,43 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1238,26 +1275,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_atomic_seg); + size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; @@ -1266,15 +1290,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; - break; default: @@ -1284,13 +1303,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, - &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - + set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; @@ -1313,12 +1326,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mlx4_wqe_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mlx4_wqe_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mlx4_wqe_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mlx4_wqe_data_seg); size += sizeof (struct mlx4_wqe_data_seg) / 16; @@ -1498,7 +1506,7 @@ static int to_ib_qp_access_flags(int mlx4_flags) static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { - memset(ib_ah_attr, 0, sizeof *path); + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) @@ -1515,7 +1523,7 @@ static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = - be32_to_cpu(path->tclass_flowlabel) & 0xffffff; + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); } @@ -1560,7 +1568,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr } qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; - qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + if (qp_attr->qp_state == IB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; @@ -1578,17 +1589,25 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr done: qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; - qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; - qp_attr->cap.max_send_sge = qp->sq.max_gs; - qp_attr->cap.max_recv_sge = qp->rq.max_gs; - qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - - send_wqe_overhead(qp->ibqp.qp_type) - - sizeof (struct mlx4_wqe_inline_seg); - qp_init_attr->cap = qp_attr->cap; + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; } + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + return 0; } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index aa563e61de65..76fed7545c53 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -67,7 +67,7 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); static int msi = 0; module_param(msi, int, 0444); -MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); +MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)"); #else /* CONFIG_PCI_MSI */ @@ -1117,9 +1117,21 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (msi_x && !mthca_enable_msi_x(mdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI_X; - if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) && - !pci_enable_msi(pdev)) - mdev->mthca_flags |= MTHCA_FLAG_MSI; + else if (msi) { + static int warned; + + if (!warned) { + printk(KERN_WARNING PFX "WARNING: MSI support will be " + "removed from the ib_mthca driver in January 2008.\n"); + printk(KERN_WARNING " If you are using MSI and cannot " + "switch to MSI-X, please tell " + "<general@lists.openfabrics.org>.\n"); + ++warned; + } + + if (!pci_enable_msi(pdev)) + mdev->mthca_flags |= MTHCA_FLAG_MSI; + } if (mthca_cmd_init(mdev)) { mthca_err(mdev, "Failed to init command interface, aborting.\n"); @@ -1135,7 +1147,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_cmd; if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { - mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n", + mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[hca_type].latest_fw >> 32), diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 11f1d99db40b..df01b2026a64 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1578,6 +1578,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, return cur + nreq >= wq->max; } +static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, + struct ib_send_wr *wr) +{ + useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, + struct ib_send_wr *wr) +{ + memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1590,8 +1629,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; int size; - int size0 = 0; - u32 f0 = 0; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); int ind; u8 op0 = 0; @@ -1636,25 +1682,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mthca_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -1663,12 +1695,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1683,12 +1712,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1700,16 +1726,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - ((struct mthca_tavor_ud_seg *) wqe)->lkey = - cpu_to_be32(to_mah(wr->wr.ud.ah)->key); - ((struct mthca_tavor_ud_seg *) wqe)->av_addr = - cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - ((struct mthca_tavor_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_tavor_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - - wqe += sizeof (struct mthca_tavor_ud_seg); + set_tavor_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; @@ -1734,13 +1752,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1768,11 +1781,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, mthca_opcode[wr->opcode]); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size | + cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size | ((wr->send_flags & IB_SEND_FENCE) ? MTHCA_NEXT_FENCE : 0)); - if (!size0) { + if (!nreq) { size0 = size; op0 = mthca_opcode[wr->opcode]; f0 = wr->send_flags & IB_SEND_FENCE ? @@ -1822,7 +1835,14 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int nreq; int i; int size; - int size0 = 0; + /* + * size0 is only used if nreq != 0, and it will always be + * initialized the first time through the main loop before + * nreq is incremented. So nreq cannot become non-zero + * without initializing size0, and it is in fact never used + * uninitialized. + */ + int uninitialized_var(size0); int ind; void *wqe; void *prev_wqe; @@ -1863,13 +1883,8 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1881,7 +1896,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD | size); - if (!size0) + if (!nreq) size0 = size; ++ind; @@ -1903,7 +1918,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, qp->rq.next_ind = ind; qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; - size0 = 0; } } @@ -1945,8 +1959,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; int size; - int size0 = 0; - u32 f0 = 0; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); int ind; u8 op0 = 0; @@ -1966,7 +1987,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; - size0 = 0; /* * Make sure that descriptors are written before @@ -2017,26 +2037,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mthca_atomic_seg *) wqe)->compare = 0; - } - - wqe += sizeof (struct mthca_atomic_seg); + set_atomic_seg(wqe, wr); + wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; break; @@ -2044,12 +2050,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2064,12 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2081,14 +2081,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, - to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - ((struct mthca_arbel_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_arbel_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - - wqe += sizeof (struct mthca_arbel_ud_seg); + set_arbel_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; break; @@ -2113,13 +2107,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -2151,7 +2140,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ((wr->send_flags & IB_SEND_FENCE) ? MTHCA_NEXT_FENCE : 0)); - if (!size0) { + if (!nreq) { size0 = size; op0 = mthca_opcode[wr->opcode]; f0 = wr->send_flags & IB_SEND_FENCE ? @@ -2241,20 +2230,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < qp->rq.max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < qp->rq.max_gs) + mthca_set_data_seg_inval(wqe); qp->wrid[ind] = wr->wr_id; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index b8f05a526673..88d219e730ad 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -543,20 +543,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); ((struct mthca_next_seg *) prev_wqe)->nda_op = cpu_to_be32((ind << srq->wqe_shift) | 1); @@ -662,20 +654,12 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index e7d2c1e86199..f6a66fe78e48 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -113,4 +113,19 @@ struct mthca_mlx_seg { __be16 vcrc; }; +static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg) +{ + dseg->byte_count = 0; + dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + dseg->addr = 0; +} + #endif /* MTHCA_WQE_H */ diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e2353701e8bb..1ee867b1b341 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -310,8 +310,6 @@ int iser_conn_init(struct iser_conn **ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); -void iser_conn_release(struct iser_conn *ib_conn); - void iser_rcv_completion(struct iser_desc *desc, unsigned long dto_xfer_len); @@ -329,9 +327,6 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction); -int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, - enum iser_data_dir cmd_dir); - void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, enum iser_data_dir cmd_dir); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index fc9f1fd0ae54..36cdf77ae92a 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -103,8 +103,8 @@ void iser_reg_single(struct iser_device *device, /** * iser_start_rdma_unaligned_sg */ -int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, - enum iser_data_dir cmd_dir) +static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, + enum iser_data_dir cmd_dir) { int dma_nents; struct ib_device *dev; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2044de1164ac..d42ec0156eec 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -311,6 +311,29 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, } /** + * Frees all conn objects and deallocs conn descriptor + */ +static void iser_conn_release(struct iser_conn *ib_conn) +{ + struct iser_device *device = ib_conn->device; + + BUG_ON(ib_conn->state != ISER_CONN_DOWN); + + mutex_lock(&ig.connlist_mutex); + list_del(&ib_conn->conn_list); + mutex_unlock(&ig.connlist_mutex); + + iser_free_ib_conn_res(ib_conn); + ib_conn->device = NULL; + /* on EVENT_ADDR_ERROR there's no device yet for this conn */ + if (device != NULL) + iser_device_try_release(device); + if (ib_conn->iser_conn) + ib_conn->iser_conn->ib_conn = NULL; + kfree(ib_conn); +} + +/** * triggers start of the disconnect procedures and wait for them to be done */ void iser_conn_terminate(struct iser_conn *ib_conn) @@ -550,30 +573,6 @@ connect_failure: } /** - * Frees all conn objects and deallocs conn descriptor - */ -void iser_conn_release(struct iser_conn *ib_conn) -{ - struct iser_device *device = ib_conn->device; - - BUG_ON(ib_conn->state != ISER_CONN_DOWN); - - mutex_lock(&ig.connlist_mutex); - list_del(&ib_conn->conn_list); - mutex_unlock(&ig.connlist_mutex); - - iser_free_ib_conn_res(ib_conn); - ib_conn->device = NULL; - /* on EVENT_ADDR_ERROR there's no device yet for this conn */ - if (device != NULL) - iser_device_try_release(device); - if (ib_conn->iser_conn) - ib_conn->iser_conn->ib_conn = NULL; - kfree(ib_conn); -} - - -/** * iser_reg_page_vec - Register physical memory * * returns: 0 on success, errno code on failure diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c index 1bb088aeaf71..6b32ec94b3a8 100644 --- a/drivers/net/mlx4/catas.c +++ b/drivers/net/mlx4/catas.c @@ -30,41 +30,133 @@ * SOFTWARE. */ +#include <linux/workqueue.h> + #include "mlx4.h" -void mlx4_handle_catas_err(struct mlx4_dev *dev) +enum { + MLX4_CATAS_POLL_INTERVAL = 5 * HZ, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static LIST_HEAD(catas_list); +static struct workqueue_struct *catas_wq; +static struct work_struct catas_work; + +static int internal_err_reset = 1; +module_param(internal_err_reset, int, 0644); +MODULE_PARM_DESC(internal_err_reset, + "Reset device on internal errors if non-zero (default 1)"); + +static void dump_err_buf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; - mlx4_err(dev, "Catastrophic error detected:\n"); + mlx4_err(dev, "Internal error detected:\n"); for (i = 0; i < priv->fw.catas_size; ++i) mlx4_err(dev, " buf[%02x]: %08x\n", i, swab32(readl(priv->catas_err.map + i))); +} - mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); +static void poll_catas(unsigned long dev_ptr) +{ + struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + + mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); + + if (internal_err_reset) { + spin_lock(&catas_lock); + list_add(&priv->catas_err.list, &catas_list); + spin_unlock(&catas_lock); + + queue_work(catas_wq, &catas_work); + } + } else + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); } -void mlx4_map_catas_buf(struct mlx4_dev *dev) +static void catas_reset(struct work_struct *work) +{ + struct mlx4_priv *priv, *tmppriv; + struct mlx4_dev *dev; + + LIST_HEAD(tlist); + int ret; + + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { + ret = mlx4_restart_one(priv->dev.pdev); + dev = &priv->dev; + if (ret) + mlx4_err(dev, "Reset failed (%d)\n", ret); + else + mlx4_dbg(dev, "Reset succeeded\n"); + } +} + +void mlx4_start_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); unsigned long addr; + INIT_LIST_HEAD(&priv->catas_err.list); + init_timer(&priv->catas_err.timer); + priv->catas_err.map = NULL; + addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + priv->fw.catas_offset; priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) - mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n", + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n", addr); + return; + } + priv->catas_err.timer.data = (unsigned long) dev; + priv->catas_err.timer.function = poll_catas; + priv->catas_err.timer.expires = + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); + add_timer(&priv->catas_err.timer); } -void mlx4_unmap_catas_buf(struct mlx4_dev *dev) +void mlx4_stop_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + del_timer_sync(&priv->catas_err.timer); + if (priv->catas_err.map) iounmap(priv->catas_err.map); + + spin_lock_irq(&catas_lock); + list_del(&priv->catas_err.list); + spin_unlock_irq(&catas_lock); +} + +int __init mlx4_catas_init(void) +{ + INIT_WORK(&catas_work, catas_reset); + + catas_wq = create_singlethread_workqueue("mlx4_err"); + if (!catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_cleanup(void) +{ + destroy_workqueue(catas_wq); } diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 27a82cecd693..2095c843fa15 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -89,14 +89,12 @@ struct mlx4_eq_context { (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ - (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ (1ull << MLX4_EVENT_TYPE_CMD)) -#define MLX4_CATAS_EVENT_MASK (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) struct mlx4_eqe { u8 reserved1; @@ -264,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); @@ -281,14 +279,6 @@ static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr) return IRQ_HANDLED; } -static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr) -{ - mlx4_handle_catas_err(dev_ptr); - - /* MSI-X vectors always belong to us */ - return IRQ_HANDLED; -} - static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, int eq_num) { @@ -490,11 +480,9 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); - if (eq_table->eq[MLX4_EQ_CATAS].have_irq) - free_irq(eq_table->eq[MLX4_EQ_CATAS].irq, dev); } static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev) @@ -598,32 +586,19 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) if (dev->flags & MLX4_FLAG_MSI_X) { static const char *eq_name[] = { [MLX4_EQ_COMP] = DRV_NAME " (comp)", - [MLX4_EQ_ASYNC] = DRV_NAME " (async)", - [MLX4_EQ_CATAS] = DRV_NAME " (catas)" + [MLX4_EQ_ASYNC] = DRV_NAME " (async)" }; - err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS, - &priv->eq_table.eq[MLX4_EQ_CATAS]); - if (err) - goto err_out_async; - - for (i = 0; i < MLX4_EQ_CATAS; ++i) { + for (i = 0; i < MLX4_NUM_EQ; ++i) { err = request_irq(priv->eq_table.eq[i].irq, mlx4_msi_x_interrupt, 0, eq_name[i], priv->eq_table.eq + i); if (err) - goto err_out_catas; + goto err_out_async; priv->eq_table.eq[i].have_irq = 1; } - err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq, - mlx4_catas_interrupt, 0, - eq_name[MLX4_EQ_CATAS], dev); - if (err) - goto err_out_catas; - - priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1; } else { err = request_irq(dev->pdev->irq, mlx4_interrupt, IRQF_SHARED, DRV_NAME, dev); @@ -639,22 +614,11 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); - if (dev->flags & MLX4_FLAG_MSI_X) { - err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0, - priv->eq_table.eq[MLX4_EQ_CATAS].eqn); - if (err) - mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n", - priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err); - } - return 0; -err_out_catas: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]); - err_out_async: mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); @@ -675,19 +639,13 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; - if (dev->flags & MLX4_FLAG_MSI_X) - mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1, - priv->eq_table.eq[MLX4_EQ_CATAS].eqn); - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); mlx4_free_irqs(dev); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); - if (dev->flags & MLX4_FLAG_MSI_X) - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]); mlx4_unmap_clr_int(dev); diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c index 9ae951bf6aa6..be5d9e90ccf2 100644 --- a/drivers/net/mlx4/intf.c +++ b/drivers/net/mlx4/intf.c @@ -142,6 +142,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); + mlx4_start_catas_poll(dev); return 0; } @@ -151,6 +152,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index a4f2e0475a71..4dc9dc19b716 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -78,7 +78,7 @@ static const char mlx4_version[] __devinitdata = static struct mlx4_profile default_profile = { .num_qp = 1 << 16, .num_srq = 1 << 16, - .rdmarc_per_qp = 4, + .rdmarc_per_qp = 1 << 4, .num_cq = 1 << 16, .num_mcg = 1 << 13, .num_mpt = 1 << 17, @@ -583,13 +583,11 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev) goto err_pd_table_free; } - mlx4_map_catas_buf(dev); - err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table, aborting.\n"); - goto err_catas_buf; + goto err_mr_table_free; } err = mlx4_cmd_use_events(dev); @@ -659,8 +657,7 @@ err_cmd_poll: err_eq_table_free: mlx4_cleanup_eq_table(dev); -err_catas_buf: - mlx4_unmap_catas_buf(dev); +err_mr_table_free: mlx4_cleanup_mr_table(dev); err_pd_table_free: @@ -836,9 +833,6 @@ err_cleanup: mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); - - mlx4_unmap_catas_buf(dev); - mlx4_cleanup_mr_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); @@ -885,9 +879,6 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev) mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); - - mlx4_unmap_catas_buf(dev); - mlx4_cleanup_mr_table(dev); mlx4_cleanup_pd_table(dev); @@ -908,6 +899,12 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev) } } +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_remove_one(pdev); + return mlx4_init_one(pdev, NULL); +} + static struct pci_device_id mlx4_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */ @@ -930,6 +927,10 @@ static int __init mlx4_init(void) { int ret; + ret = mlx4_catas_init(); + if (ret) + return ret; + ret = pci_register_driver(&mlx4_driver); return ret < 0 ? ret : 0; } @@ -937,6 +938,7 @@ static int __init mlx4_init(void) static void __exit mlx4_cleanup(void) { pci_unregister_driver(&mlx4_driver); + mlx4_catas_cleanup(); } module_init(mlx4_init); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index d9c91a71fc87..be304a7c2c91 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -39,6 +39,7 @@ #include <linux/mutex.h> #include <linux/radix-tree.h> +#include <linux/timer.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -67,7 +68,6 @@ enum { enum { MLX4_EQ_ASYNC, MLX4_EQ_COMP, - MLX4_EQ_CATAS, MLX4_NUM_EQ }; @@ -248,7 +248,8 @@ struct mlx4_mcg_table { struct mlx4_catas_err { u32 __iomem *map; - int size; + struct timer_list timer; + struct list_head list; }; struct mlx4_priv { @@ -311,9 +312,11 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -void mlx4_map_catas_buf(struct mlx4_dev *dev); -void mlx4_unmap_catas_buf(struct mlx4_dev *dev); - +void mlx4_start_catas_poll(struct mlx4_dev *dev); +void mlx4_stop_catas_poll(struct mlx4_dev *dev); +int mlx4_catas_init(void); +void mlx4_catas_cleanup(void); +int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig new file mode 100644 index 000000000000..b778ed71f636 --- /dev/null +++ b/drivers/uio/Kconfig @@ -0,0 +1,29 @@ +menu "Userspace I/O" + depends on !S390 + +config UIO + tristate "Userspace I/O drivers" + default n + help + Enable this to allow the userspace driver core code to be + built. This code allows userspace programs easy access to + kernel interrupts and memory locations, allowing some drivers + to be written in userspace. Note that a small kernel driver + is also required for interrupt handling to work properly. + + If you don't know what to do here, say N. + +config UIO_CIF + tristate "generic Hilscher CIF Card driver" + depends on UIO && PCI + default n + help + Driver for Hilscher CIF DeviceNet and Profibus cards. This + driver requires a userspace component that handles all of the + heavy lifting and can be found at: + http://www.osadl.org/projects/downloads/UIO/user/cif-* + + To compile this driver as a module, choose M here: the module + will be called uio_cif. + +endmenu diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile new file mode 100644 index 000000000000..7fecfb459da5 --- /dev/null +++ b/drivers/uio/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_UIO) += uio.o +obj-$(CONFIG_UIO_CIF) += uio_cif.o diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c new file mode 100644 index 000000000000..865f32b63b5c --- /dev/null +++ b/drivers/uio/uio.c @@ -0,0 +1,701 @@ +/* + * drivers/uio/uio.c + * + * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de> + * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de> + * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com> + * + * Userspace IO + * + * Base Functions + * + * Licensed under the GPLv2 only. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/idr.h> +#include <linux/string.h> +#include <linux/kobject.h> +#include <linux/uio_driver.h> + +#define UIO_MAX_DEVICES 255 + +struct uio_device { + struct module *owner; + struct device *dev; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + int vma_count; + struct uio_info *info; + struct kset map_attr_kset; +}; + +static int uio_major; +static DEFINE_IDR(uio_idr); +static struct file_operations uio_fops; + +/* UIO class infrastructure */ +static struct uio_class { + struct kref kref; + struct class *class; +} *uio_class; + +/* + * attributes + */ + +static struct attribute attr_addr = { + .name = "addr", + .mode = S_IRUGO, +}; + +static struct attribute attr_size = { + .name = "size", + .mode = S_IRUGO, +}; + +static struct attribute* map_attrs[] = { + &attr_addr, &attr_size, NULL +}; + +static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct uio_mem *mem = container_of(kobj, struct uio_mem, kobj); + + if (strncmp(attr->name,"addr",4) == 0) + return sprintf(buf, "0x%lx\n", mem->addr); + + if (strncmp(attr->name,"size",4) == 0) + return sprintf(buf, "0x%lx\n", mem->size); + + return -ENODEV; +} + +static void map_attr_release(struct kobject *kobj) +{ + /* TODO ??? */ +} + +static struct sysfs_ops map_attr_ops = { + .show = map_attr_show, +}; + +static struct kobj_type map_attr_type = { + .release = map_attr_release, + .sysfs_ops = &map_attr_ops, + .default_attrs = map_attrs, +}; + +static ssize_t show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%s\n", idev->info->name); + else + return -ENODEV; +} +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); + +static ssize_t show_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%s\n", idev->info->version); + else + return -ENODEV; +} +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); + +static ssize_t show_event(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%u\n", + (unsigned int)atomic_read(&idev->event)); + else + return -ENODEV; +} +static DEVICE_ATTR(event, S_IRUGO, show_event, NULL); + +static struct attribute *uio_attrs[] = { + &dev_attr_name.attr, + &dev_attr_version.attr, + &dev_attr_event.attr, + NULL, +}; + +static struct attribute_group uio_attr_grp = { + .attrs = uio_attrs, +}; + +/* + * device functions + */ +static int uio_dev_add_attributes(struct uio_device *idev) +{ + int ret; + int mi; + int map_found = 0; + struct uio_mem *mem; + + ret = sysfs_create_group(&idev->dev->kobj, &uio_attr_grp); + if (ret) + goto err_group; + + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + mem = &idev->info->mem[mi]; + if (mem->size == 0) + break; + if (!map_found) { + map_found = 1; + kobject_set_name(&idev->map_attr_kset.kobj,"maps"); + idev->map_attr_kset.ktype = &map_attr_type; + idev->map_attr_kset.kobj.parent = &idev->dev->kobj; + ret = kset_register(&idev->map_attr_kset); + if (ret) + goto err_remove_group; + } + kobject_init(&mem->kobj); + kobject_set_name(&mem->kobj,"map%d",mi); + mem->kobj.parent = &idev->map_attr_kset.kobj; + mem->kobj.kset = &idev->map_attr_kset; + ret = kobject_add(&mem->kobj); + if (ret) + goto err_remove_maps; + } + + return 0; + +err_remove_maps: + for (mi--; mi>=0; mi--) { + mem = &idev->info->mem[mi]; + kobject_unregister(&mem->kobj); + } + kset_unregister(&idev->map_attr_kset); /* Needed ? */ +err_remove_group: + sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp); +err_group: + dev_err(idev->dev, "error creating sysfs files (%d)\n", ret); + return ret; +} + +static void uio_dev_del_attributes(struct uio_device *idev) +{ + int mi; + struct uio_mem *mem; + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + mem = &idev->info->mem[mi]; + if (mem->size == 0) + break; + kobject_unregister(&mem->kobj); + } + kset_unregister(&idev->map_attr_kset); + sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp); +} + +static int uio_get_minor(struct uio_device *idev) +{ + static DEFINE_MUTEX(minor_lock); + int retval = -ENOMEM; + int id; + + mutex_lock(&minor_lock); + if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0) + goto exit; + + retval = idr_get_new(&uio_idr, idev, &id); + if (retval < 0) { + if (retval == -EAGAIN) + retval = -ENOMEM; + goto exit; + } + idev->minor = id & MAX_ID_MASK; +exit: + mutex_unlock(&minor_lock); + return retval; +} + +static void uio_free_minor(struct uio_device *idev) +{ + idr_remove(&uio_idr, idev->minor); +} + +/** + * uio_event_notify - trigger an interrupt event + * @info: UIO device capabilities + */ +void uio_event_notify(struct uio_info *info) +{ + struct uio_device *idev = info->uio_dev; + + atomic_inc(&idev->event); + wake_up_interruptible(&idev->wait); + kill_fasync(&idev->async_queue, SIGIO, POLL_IN); +} +EXPORT_SYMBOL_GPL(uio_event_notify); + +/** + * uio_interrupt - hardware interrupt handler + * @irq: IRQ number, can be UIO_IRQ_CYCLIC for cyclic timer + * @dev_id: Pointer to the devices uio_device structure + */ +static irqreturn_t uio_interrupt(int irq, void *dev_id) +{ + struct uio_device *idev = (struct uio_device *)dev_id; + irqreturn_t ret = idev->info->handler(irq, idev->info); + + if (ret == IRQ_HANDLED) + uio_event_notify(idev->info); + + return ret; +} + +struct uio_listener { + struct uio_device *dev; + s32 event_count; +}; + +static int uio_open(struct inode *inode, struct file *filep) +{ + struct uio_device *idev; + struct uio_listener *listener; + int ret = 0; + + idev = idr_find(&uio_idr, iminor(inode)); + if (!idev) + return -ENODEV; + + listener = kmalloc(sizeof(*listener), GFP_KERNEL); + if (!listener) + return -ENOMEM; + + listener->dev = idev; + listener->event_count = atomic_read(&idev->event); + filep->private_data = listener; + + if (idev->info->open) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->open(idev->info, inode); + module_put(idev->owner); + } + + if (ret) + kfree(listener); + + return ret; +} + +static int uio_fasync(int fd, struct file *filep, int on) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + return fasync_helper(fd, filep, on, &idev->async_queue); +} + +static int uio_release(struct inode *inode, struct file *filep) +{ + int ret = 0; + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + if (idev->info->release) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->release(idev->info, inode); + module_put(idev->owner); + } + if (filep->f_flags & FASYNC) + ret = uio_fasync(-1, filep, 0); + kfree(listener); + return ret; +} + +static unsigned int uio_poll(struct file *filep, poll_table *wait) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + poll_wait(filep, &idev->wait, wait); + if (listener->event_count != atomic_read(&idev->event)) + return POLLIN | POLLRDNORM; + return 0; +} + +static ssize_t uio_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + DECLARE_WAITQUEUE(wait, current); + ssize_t retval; + s32 event_count; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + if (count != sizeof(s32)) + return -EINVAL; + + add_wait_queue(&idev->wait, &wait); + + do { + set_current_state(TASK_INTERRUPTIBLE); + + event_count = atomic_read(&idev->event); + if (event_count != listener->event_count) { + if (copy_to_user(buf, &event_count, count)) + retval = -EFAULT; + else { + listener->event_count = event_count; + retval = count; + } + break; + } + + if (filep->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + break; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + schedule(); + } while (1); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idev->wait, &wait); + + return retval; +} + +static int uio_find_mem_index(struct vm_area_struct *vma) +{ + int mi; + struct uio_device *idev = vma->vm_private_data; + + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + if (idev->info->mem[mi].size == 0) + return -1; + if (vma->vm_pgoff == mi) + return mi; + } + return -1; +} + +static void uio_vma_open(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + idev->vma_count++; +} + +static void uio_vma_close(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + idev->vma_count--; +} + +static struct page *uio_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct uio_device *idev = vma->vm_private_data; + struct page* page = NOPAGE_SIGBUS; + + int mi = uio_find_mem_index(vma); + if (mi < 0) + return page; + + if (idev->info->mem[mi].memtype == UIO_MEM_LOGICAL) + page = virt_to_page(idev->info->mem[mi].addr); + else + page = vmalloc_to_page((void*)idev->info->mem[mi].addr); + get_page(page); + if (type) + *type = VM_FAULT_MINOR; + return page; +} + +static struct vm_operations_struct uio_vm_ops = { + .open = uio_vma_open, + .close = uio_vma_close, + .nopage = uio_vma_nopage, +}; + +static int uio_mmap_physical(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + int mi = uio_find_mem_index(vma); + if (mi < 0) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_RESERVED; + + return remap_pfn_range(vma, + vma->vm_start, + idev->info->mem[mi].addr >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static int uio_mmap_logical(struct vm_area_struct *vma) +{ + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &uio_vm_ops; + uio_vma_open(vma); + return 0; +} + +static int uio_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + int mi; + unsigned long requested_pages, actual_pages; + int ret = 0; + + if (vma->vm_end < vma->vm_start) + return -EINVAL; + + vma->vm_private_data = idev; + + mi = uio_find_mem_index(vma); + if (mi < 0) + return -EINVAL; + + requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + actual_pages = (idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT; + if (requested_pages > actual_pages) + return -EINVAL; + + if (idev->info->mmap) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->mmap(idev->info, vma); + module_put(idev->owner); + return ret; + } + + switch (idev->info->mem[mi].memtype) { + case UIO_MEM_PHYS: + return uio_mmap_physical(vma); + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + return uio_mmap_logical(vma); + default: + return -EINVAL; + } +} + +static struct file_operations uio_fops = { + .owner = THIS_MODULE, + .open = uio_open, + .release = uio_release, + .read = uio_read, + .mmap = uio_mmap, + .poll = uio_poll, + .fasync = uio_fasync, +}; + +static int uio_major_init(void) +{ + uio_major = register_chrdev(0, "uio", &uio_fops); + if (uio_major < 0) + return uio_major; + return 0; +} + +static void uio_major_cleanup(void) +{ + unregister_chrdev(uio_major, "uio"); +} + +static int init_uio_class(void) +{ + int ret = 0; + + if (uio_class != NULL) { + kref_get(&uio_class->kref); + goto exit; + } + + /* This is the first time in here, set everything up properly */ + ret = uio_major_init(); + if (ret) + goto exit; + + uio_class = kzalloc(sizeof(*uio_class), GFP_KERNEL); + if (!uio_class) { + ret = -ENOMEM; + goto err_kzalloc; + } + + kref_init(&uio_class->kref); + uio_class->class = class_create(THIS_MODULE, "uio"); + if (IS_ERR(uio_class->class)) { + ret = IS_ERR(uio_class->class); + printk(KERN_ERR "class_create failed for uio\n"); + goto err_class_create; + } + return 0; + +err_class_create: + kfree(uio_class); + uio_class = NULL; +err_kzalloc: + uio_major_cleanup(); +exit: + return ret; +} + +static void release_uio_class(struct kref *kref) +{ + /* Ok, we cheat as we know we only have one uio_class */ + class_destroy(uio_class->class); + kfree(uio_class); + uio_major_cleanup(); + uio_class = NULL; +} + +static void uio_class_destroy(void) +{ + if (uio_class) + kref_put(&uio_class->kref, release_uio_class); +} + +/** + * uio_register_device - register a new userspace IO device + * @owner: module that creates the new device + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ +int __uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info) +{ + struct uio_device *idev; + int ret = 0; + + if (!parent || !info || !info->name || !info->version) + return -EINVAL; + + info->uio_dev = NULL; + + ret = init_uio_class(); + if (ret) + return ret; + + idev = kzalloc(sizeof(*idev), GFP_KERNEL); + if (!idev) { + ret = -ENOMEM; + goto err_kzalloc; + } + + idev->owner = owner; + idev->info = info; + init_waitqueue_head(&idev->wait); + atomic_set(&idev->event, 0); + + ret = uio_get_minor(idev); + if (ret) + goto err_get_minor; + + idev->dev = device_create(uio_class->class, parent, + MKDEV(uio_major, idev->minor), + "uio%d", idev->minor); + if (IS_ERR(idev->dev)) { + printk(KERN_ERR "UIO: device register failed\n"); + ret = PTR_ERR(idev->dev); + goto err_device_create; + } + dev_set_drvdata(idev->dev, idev); + + ret = uio_dev_add_attributes(idev); + if (ret) + goto err_uio_dev_add_attributes; + + info->uio_dev = idev; + + if (idev->info->irq >= 0) { + ret = request_irq(idev->info->irq, uio_interrupt, + idev->info->irq_flags, idev->info->name, idev); + if (ret) + goto err_request_irq; + } + + return 0; + +err_request_irq: + uio_dev_del_attributes(idev); +err_uio_dev_add_attributes: + device_destroy(uio_class->class, MKDEV(uio_major, idev->minor)); +err_device_create: + uio_free_minor(idev); +err_get_minor: + kfree(idev); +err_kzalloc: + uio_class_destroy(); + return ret; +} +EXPORT_SYMBOL_GPL(__uio_register_device); + +/** + * uio_unregister_device - unregister a industrial IO device + * @info: UIO device capabilities + * + */ +void uio_unregister_device(struct uio_info *info) +{ + struct uio_device *idev; + + if (!info || !info->uio_dev) + return; + + idev = info->uio_dev; + + uio_free_minor(idev); + + if (info->irq >= 0) + free_irq(info->irq, idev); + + uio_dev_del_attributes(idev); + + dev_set_drvdata(idev->dev, NULL); + device_destroy(uio_class->class, MKDEV(uio_major, idev->minor)); + kfree(idev); + uio_class_destroy(); + + return; +} +EXPORT_SYMBOL_GPL(uio_unregister_device); + +static int __init uio_init(void) +{ + return 0; +} + +static void __exit uio_exit(void) +{ +} + +module_init(uio_init) +module_exit(uio_exit) +MODULE_LICENSE("GPL v2"); diff --git a/drivers/uio/uio_cif.c b/drivers/uio/uio_cif.c new file mode 100644 index 000000000000..838bae460831 --- /dev/null +++ b/drivers/uio/uio_cif.c @@ -0,0 +1,156 @@ +/* + * UIO Hilscher CIF card driver + * + * (C) 2007 Hans J. Koch <hjk@linutronix.de> + * Original code (C) 2005 Benedikt Spranger <b.spranger@linutronix.de> + * + * Licensed under GPL version 2 only. + * + */ + +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/uio_driver.h> + +#include <asm/io.h> + +#ifndef PCI_DEVICE_ID_PLX_9030 +#define PCI_DEVICE_ID_PLX_9030 0x9030 +#endif + +#define PLX9030_INTCSR 0x4C +#define INTSCR_INT1_ENABLE 0x01 +#define INTSCR_INT1_STATUS 0x04 +#define INT1_ENABLED_AND_ACTIVE (INTSCR_INT1_ENABLE | INTSCR_INT1_STATUS) + +#define PCI_SUBVENDOR_ID_PEP 0x1518 +#define CIF_SUBDEVICE_PROFIBUS 0x430 +#define CIF_SUBDEVICE_DEVICENET 0x432 + + +static irqreturn_t hilscher_handler(int irq, struct uio_info *dev_info) +{ + void __iomem *plx_intscr = dev_info->mem[0].internal_addr + + PLX9030_INTCSR; + + if ((ioread8(plx_intscr) & INT1_ENABLED_AND_ACTIVE) + != INT1_ENABLED_AND_ACTIVE) + return IRQ_NONE; + + /* Disable interrupt */ + iowrite8(ioread8(plx_intscr) & ~INTSCR_INT1_ENABLE, plx_intscr); + return IRQ_HANDLED; +} + +static int __devinit hilscher_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct uio_info *info; + + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + if (pci_enable_device(dev)) + goto out_free; + + if (pci_request_regions(dev, "hilscher")) + goto out_disable; + + info->mem[0].addr = pci_resource_start(dev, 0); + if (!info->mem[0].addr) + goto out_release; + info->mem[0].internal_addr = ioremap(pci_resource_start(dev, 0), + pci_resource_len(dev, 0)); + if (!info->mem[0].internal_addr) + goto out_release; + + info->mem[0].size = pci_resource_len(dev, 0); + info->mem[0].memtype = UIO_MEM_PHYS; + info->mem[1].addr = pci_resource_start(dev, 2); + info->mem[1].size = pci_resource_len(dev, 2); + info->mem[1].memtype = UIO_MEM_PHYS; + switch (id->subdevice) { + case CIF_SUBDEVICE_PROFIBUS: + info->name = "CIF_Profibus"; + break; + case CIF_SUBDEVICE_DEVICENET: + info->name = "CIF_Devicenet"; + break; + default: + info->name = "CIF_???"; + } + info->version = "0.0.1"; + info->irq = dev->irq; + info->irq_flags = IRQF_DISABLED | IRQF_SHARED; + info->handler = hilscher_handler; + + if (uio_register_device(&dev->dev, info)) + goto out_unmap; + + pci_set_drvdata(dev, info); + + return 0; +out_unmap: + iounmap(info->mem[0].internal_addr); +out_release: + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_free: + kfree (info); + return -ENODEV; +} + +static void hilscher_pci_remove(struct pci_dev *dev) +{ + struct uio_info *info = pci_get_drvdata(dev); + + uio_unregister_device(info); + pci_release_regions(dev); + pci_disable_device(dev); + pci_set_drvdata(dev, NULL); + iounmap(info->mem[0].internal_addr); + + kfree (info); +} + +static struct pci_device_id hilscher_pci_ids[] = { + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9030, + .subvendor = PCI_SUBVENDOR_ID_PEP, + .subdevice = CIF_SUBDEVICE_PROFIBUS, + }, + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9030, + .subvendor = PCI_SUBVENDOR_ID_PEP, + .subdevice = CIF_SUBDEVICE_DEVICENET, + }, + { 0, } +}; + +static struct pci_driver hilscher_pci_driver = { + .name = "hilscher", + .id_table = hilscher_pci_ids, + .probe = hilscher_pci_probe, + .remove = hilscher_pci_remove, +}; + +static int __init hilscher_init_module(void) +{ + return pci_register_driver(&hilscher_pci_driver); +} + +static void __exit hilscher_exit_module(void) +{ + pci_unregister_driver(&hilscher_pci_driver); +} + +module_init(hilscher_init_module); +module_exit(hilscher_exit_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Hans J. Koch, Benedikt Spranger"); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 196d83266e34..1a5e8e893d75 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -489,6 +489,29 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) } /** + * gfs2_setlease - acquire/release a file lease + * @file: the file pointer + * @arg: lease type + * @fl: file lock + * + * Returns: errno + */ + +static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) +{ + struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); + + /* + * We don't currently have a way to enforce a lease across the whole + * cluster; until we do, disable leases (by just returning -EINVAL), + * unless the administrator has requested purely local locking. + */ + if (!sdp->sd_args.ar_localflocks) + return -EINVAL; + return setlease(file, arg, fl); +} + +/** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait @@ -638,6 +661,7 @@ const struct file_operations gfs2_file_fops = { .flock = gfs2_flock, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, + .setlease = gfs2_setlease, }; const struct file_operations gfs2_dir_fops = { diff --git a/fs/locks.c b/fs/locks.c index 431a8b871fce..4f2d749ac624 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -458,22 +458,20 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static int lease_alloc(struct file *filp, int type, struct file_lock **flp) +static struct file_lock *lease_alloc(struct file *filp, int type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; if (fl == NULL) - goto out; + return ERR_PTR(error); error = lease_init(filp, type, fl); if (error) { locks_free_lock(fl); - fl = NULL; + return ERR_PTR(error); } -out: - *flp = fl; - return error; + return fl; } /* Check if two locks overlap each other. @@ -661,7 +659,7 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w return result; } -int +void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; @@ -673,14 +671,12 @@ posix_test_lock(struct file *filp, struct file_lock *fl) if (posix_locks_conflict(cfl, fl)) break; } - if (cfl) { + if (cfl) __locks_copy_lock(fl, cfl); - unlock_kernel(); - return 1; - } else + else fl->fl_type = F_UNLCK; unlock_kernel(); - return 0; + return; } EXPORT_SYMBOL(posix_test_lock); @@ -1169,9 +1165,9 @@ static void time_out_leases(struct inode *inode) * @inode: the inode of the file to return * @mode: the open mode (read or write) * - * break_lease (inlined for speed) has checked there already - * is a lease on this file. Leases are broken on a call to open() - * or truncate(). This function can sleep unless you + * break_lease (inlined for speed) has checked there already is at least + * some kind of lock (maybe a lease) on this file. Leases are broken on + * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ int __break_lease(struct inode *inode, unsigned int mode) @@ -1179,12 +1175,10 @@ int __break_lease(struct inode *inode, unsigned int mode) int error = 0, future; struct file_lock *new_fl, *flock; struct file_lock *fl; - int alloc_err; unsigned long break_time; int i_have_this_lease = 0; - alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK, - &new_fl); + new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK); lock_kernel(); @@ -1212,8 +1206,9 @@ int __break_lease(struct inode *inode, unsigned int mode) goto out; } - if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) { - error = alloc_err; + if (IS_ERR(new_fl) && !i_have_this_lease + && ((mode & O_NONBLOCK) == 0)) { + error = PTR_ERR(new_fl); goto out; } @@ -1260,7 +1255,7 @@ restart: out: unlock_kernel(); - if (!alloc_err) + if (!IS_ERR(new_fl)) locks_free_lock(new_fl); return error; } @@ -1329,7 +1324,7 @@ int fcntl_getlease(struct file *filp) } /** - * __setlease - sets a lease on an open file + * setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @flp: input - file_lock to use, output - file_lock inserted @@ -1339,18 +1334,24 @@ int fcntl_getlease(struct file *filp) * * Called with kernel lock held. */ -static int __setlease(struct file *filp, long arg, struct file_lock **flp) +int setlease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; + if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) + return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + error = security_file_lock(filp, arg); + if (error) + return error; + time_out_leases(inode); - error = -EINVAL; - if (!flp || !(*flp) || !(*flp)->fl_lmops || !(*flp)->fl_lmops->fl_break) - goto out; + BUG_ON(!(*flp)->fl_lmops->fl_break); lease = *flp; @@ -1418,39 +1419,49 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) out: return error; } +EXPORT_SYMBOL(setlease); /** - * setlease - sets a lease on an open file + * vfs_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @lease: file_lock to use * * Call this to establish a lease on the file. - * The fl_lmops fl_break function is required by break_lease + * The (*lease)->fl_lmops->fl_break operation must be set; if not, + * break_lease will oops! + * + * This will call the filesystem's setlease file method, if + * defined. Note that there is no getlease method; instead, the + * filesystem setlease method should call back to setlease() to + * add a lease to the inode's lease list, where fcntl_getlease() can + * find it. Since fcntl_getlease() only reports whether the current + * task holds a lease, a cluster filesystem need only do this for + * leases held by processes on this node. + * + * There is also no break_lease method; filesystems that + * handle their own leases shoud break leases themselves from the + * filesystem's open, create, and (on truncate) setattr methods. + * + * Warning: the only current setlease methods exist only to disable + * leases in certain cases. More vfs changes may be required to + * allow a full filesystem lease implementation. */ -int setlease(struct file *filp, long arg, struct file_lock **lease) +int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; int error; - if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) - return -EACCES; - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - error = security_file_lock(filp, arg); - if (error) - return error; - lock_kernel(); - error = __setlease(filp, arg, lease); + if (filp->f_op && filp->f_op->setlease) + error = filp->f_op->setlease(filp, arg, lease); + else + error = setlease(filp, arg, lease); unlock_kernel(); return error; } - -EXPORT_SYMBOL(setlease); +EXPORT_SYMBOL_GPL(vfs_setlease); /** * fcntl_setlease - sets a lease on an open file @@ -1469,14 +1480,6 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) struct inode *inode = dentry->d_inode; int error; - if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) - return -EACCES; - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - error = security_file_lock(filp, arg); - if (error) - return error; - locks_init_lock(&fl); error = lease_init(filp, arg, &fl); if (error) @@ -1484,15 +1487,15 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) lock_kernel(); - error = __setlease(filp, arg, &flp); + error = vfs_setlease(filp, arg, &flp); if (error || arg == F_UNLCK) goto out_unlock; error = fasync_helper(fd, filp, 1, &flp->fl_fasync); if (error < 0) { - /* remove lease just inserted by __setlease */ + /* remove lease just inserted by setlease */ flp->fl_type = F_UNLCK | F_INPROGRESS; - flp->fl_break_time = jiffies- 10; + flp->fl_break_time = jiffies - 10; time_out_leases(inode); goto out_unlock; } @@ -1597,8 +1600,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) /** * vfs_test_lock - test file byte range lock * @filp: The file to test lock for - * @fl: The lock to test - * @conf: Place to return a copy of the conflicting lock, if found + * @fl: The lock to test; also used to hold result * * Returns -ERRNO on failure. Indicates presence of conflicting lock by * setting conf->fl_type to something other than F_UNLCK. diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8689b736fdd9..c87dc713b5d7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -53,6 +53,7 @@ static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); +static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, @@ -69,6 +70,7 @@ const struct file_operations nfs_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .check_flags = nfs_check_flags, + .setlease = nfs_setlease, }; const struct inode_operations nfs_file_inode_operations = { @@ -400,7 +402,9 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) lock_kernel(); /* Try local locking first */ - if (posix_test_lock(filp, fl)) { + posix_test_lock(filp, fl); + if (fl->fl_type != F_UNLCK) { + /* found a conflict */ goto out; } @@ -558,3 +562,13 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_unlk(filp, cmd, fl); return do_setlk(filp, cmd, fl); } + +static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) +{ + /* + * There is no protocol support for leases, so we have no way + * to implement them correctly in the face of opens by other + * clients. + */ + return -EINVAL; +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e4a4c87ec8c6..6284807bd37e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -256,7 +256,7 @@ nfs4_close_delegation(struct nfs4_delegation *dp) /* The following nfsd_close may not actually close the file, * but we want to remove the lease in any case. */ if (dp->dl_flock) - setlease(filp, F_UNLCK, &dp->dl_flock); + vfs_setlease(filp, F_UNLCK, &dp->dl_flock); nfsd_close(filp); } @@ -1402,7 +1402,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl) /* * Set the delegation file_lock back pointer. * - * Called from __setlease() with lock_kernel() held. + * Called from setlease() with lock_kernel() held. */ static void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) @@ -1416,7 +1416,7 @@ void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) } /* - * Called from __setlease() with lock_kernel() held + * Called from setlease() with lock_kernel() held */ static int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) @@ -1716,10 +1716,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta fl.fl_file = stp->st_vfs_file; fl.fl_pid = current->tgid; - /* setlease checks to see if delegation should be handed out. + /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callbacks fl_mylease and fl_change are used */ - if ((status = setlease(stp->st_vfs_file, + if ((status = vfs_setlease(stp->st_vfs_file, flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); unhash_delegation(dp); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b806c5e32eb..9562a59b3703 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -862,7 +862,7 @@ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); -extern int posix_test_lock(struct file *, struct file_lock *); +extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); @@ -873,6 +873,7 @@ extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int setlease(struct file *, long, struct file_lock **); +extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); @@ -1122,6 +1123,7 @@ struct file_operations { int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long, struct file_lock **); }; struct inode_operations { diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h new file mode 100644 index 000000000000..44c28e94df50 --- /dev/null +++ b/include/linux/uio_driver.h @@ -0,0 +1,91 @@ +/* + * include/linux/uio_driver.h + * + * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de> + * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de> + * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com> + * + * Userspace IO driver. + * + * Licensed under the GPLv2 only. + */ + +#ifndef _UIO_DRIVER_H_ +#define _UIO_DRIVER_H_ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/interrupt.h> + +/** + * struct uio_mem - description of a UIO memory region + * @kobj: kobject for this mapping + * @addr: address of the device's memory + * @size: size of IO + * @memtype: type of memory addr points to + * @internal_addr: ioremap-ped version of addr, for driver internal use + */ +struct uio_mem { + struct kobject kobj; + unsigned long addr; + unsigned long size; + int memtype; + void __iomem *internal_addr; +}; + +#define MAX_UIO_MAPS 5 + +struct uio_device; + +/** + * struct uio_info - UIO device capabilities + * @uio_dev: the UIO device this info belongs to + * @name: device name + * @version: device driver version + * @mem: list of mappable memory regions, size==0 for end of list + * @irq: interrupt number or UIO_IRQ_CUSTOM + * @irq_flags: flags for request_irq() + * @priv: optional private data + * @handler: the device's irq handler + * @mmap: mmap operation for this uio device + * @open: open operation for this uio device + * @release: release operation for this uio device + */ +struct uio_info { + struct uio_device *uio_dev; + char *name; + char *version; + struct uio_mem mem[MAX_UIO_MAPS]; + long irq; + unsigned long irq_flags; + void *priv; + irqreturn_t (*handler)(int irq, struct uio_info *dev_info); + int (*mmap)(struct uio_info *info, struct vm_area_struct *vma); + int (*open)(struct uio_info *info, struct inode *inode); + int (*release)(struct uio_info *info, struct inode *inode); +}; + +extern int __must_check + __uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info); +static inline int __must_check + uio_register_device(struct device *parent, struct uio_info *info) +{ + return __uio_register_device(THIS_MODULE, parent, info); +} +extern void uio_unregister_device(struct uio_info *info); +extern void uio_event_notify(struct uio_info *info); + +/* defines for uio_device->irq */ +#define UIO_IRQ_CUSTOM -1 +#define UIO_IRQ_NONE -2 + +/* defines for uio_device->memtype */ +#define UIO_MEM_NONE 0 +#define UIO_MEM_PHYS 1 +#define UIO_MEM_LOGICAL 2 +#define UIO_MEM_VIRTUAL 3 + +#endif /* _LINUX_UIO_DRIVER_H_ */ |