diff options
627 files changed, 33958 insertions, 16343 deletions
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index a667eb1fc26e..7f1730f1a1ae 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -633,12 +633,27 @@ covers RTL which is used frequently with assembly language in the kernel. Kernel developers like to be seen as literate. Do mind the spelling of kernel messages to make a good impression. Do not use crippled -words like "dont" and use "do not" or "don't" instead. +words like "dont"; use "do not" or "don't" instead. Make the messages +concise, clear, and unambiguous. Kernel messages do not have to be terminated with a period. Printing numbers in parentheses (%d) adds no value and should be avoided. +There are a number of driver model diagnostic macros in <linux/device.h> +which you should use to make sure messages are matched to the right device +and driver, and are tagged with the right level: dev_err(), dev_warn(), +dev_info(), and so forth. For messages that aren't associated with a +particular device, <linux/kernel.h> defines pr_debug() and pr_info(). + +Coming up with good debugging messages can be quite a challenge; and once +you have them, they can be a huge help for remote troubleshooting. Such +messages should be compiled out when the DEBUG symbol is not defined (that +is, by default they are not included). When you use dev_dbg() or pr_debug(), +that's automatic. Many subsystems have Kconfig options to turn on -DDEBUG. +A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the +ones already enabled by DEBUG. + Chapter 14: Allocating memory @@ -790,4 +805,5 @@ Kernel CodingStyle, by greg@kroah.com at OLS 2002: http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ -- -Last updated on 2006-December-06. +Last updated on 2007-July-13. + diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 6fd1646d3204..08687e45e19d 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -15,11 +15,11 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ ### # The build process is as follows (targets): -# (xmldocs) -# file.tmpl --> file.xml +--> file.ps (psdocs) -# +--> file.pdf (pdfdocs) -# +--> DIR=file (htmldocs) -# +--> man/ (mandocs) +# (xmldocs) [by docproc] +# file.tmpl --> file.xml +--> file.ps (psdocs) [by db2ps or xmlto] +# +--> file.pdf (pdfdocs) [by db2pdf or xmlto] +# +--> DIR=file (htmldocs) [by xmlto] +# +--> man/ (mandocs) [by xmlto] # for PDF and PS output you can choose between xmlto and docbook-utils tools diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index fd2ef4d29b6d..eb42bf9847cb 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -159,7 +159,6 @@ X!Ilib/string.c !Earch/i386/lib/usercopy.c </sect1> <sect1><title>More Memory Management Functions</title> -!Iinclude/linux/rmap.h !Emm/readahead.c !Emm/filemap.c !Emm/memory.c @@ -408,6 +407,10 @@ X!Edrivers/pnp/system.c !Edrivers/pnp/manager.c !Edrivers/pnp/support.c </sect1> + <sect1><title>Userspace IO devices</title> +!Edrivers/uio/uio.c +!Iinclude/linux/uio_driver.h + </sect1> </chapter> <chapter id="blkdev"> diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl new file mode 100644 index 000000000000..e3bb29a8d8dd --- /dev/null +++ b/Documentation/DocBook/uio-howto.tmpl @@ -0,0 +1,611 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" +"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []> + +<book id="index"> +<bookinfo> +<title>The Userspace I/O HOWTO</title> + +<author> + <firstname>Hans-Jürgen</firstname> + <surname>Koch</surname> + <authorblurb><para>Linux developer, Linutronix</para></authorblurb> + <affiliation> + <orgname> + <ulink url="http://www.linutronix.de">Linutronix</ulink> + </orgname> + + <address> + <email>hjk@linutronix.de</email> + </address> + </affiliation> +</author> + +<pubdate>2006-12-11</pubdate> + +<abstract> + <para>This HOWTO describes concept and usage of Linux kernel's + Userspace I/O system.</para> +</abstract> + +<revhistory> + <revision> + <revnumber>0.3</revnumber> + <date>2007-04-29</date> + <authorinitials>hjk</authorinitials> + <revremark>Added section about userspace drivers.</revremark> + </revision> + <revision> + <revnumber>0.2</revnumber> + <date>2007-02-13</date> + <authorinitials>hjk</authorinitials> + <revremark>Update after multiple mappings were added.</revremark> + </revision> + <revision> + <revnumber>0.1</revnumber> + <date>2006-12-11</date> + <authorinitials>hjk</authorinitials> + <revremark>First draft.</revremark> + </revision> +</revhistory> +</bookinfo> + +<chapter id="aboutthisdoc"> +<?dbhtml filename="about.html"?> +<title>About this document</title> + +<sect1 id="copyright"> +<?dbhtml filename="copyright.html"?> +<title>Copyright and License</title> +<para> + Copyright (c) 2006 by Hans-Jürgen Koch.</para> +<para> +This documentation is Free Software licensed under the terms of the +GPL version 2. +</para> +</sect1> + +<sect1 id="translations"> +<?dbhtml filename="translations.html"?> +<title>Translations</title> + +<para>If you know of any translations for this document, or you are +interested in translating it, please email me +<email>hjk@linutronix.de</email>. +</para> +</sect1> + +<sect1 id="preface"> +<title>Preface</title> + <para> + For many types of devices, creating a Linux kernel driver is + overkill. All that is really needed is some way to handle an + interrupt and provide access to the memory space of the + device. The logic of controlling the device does not + necessarily have to be within the kernel, as the device does + not need to take advantage of any of other resources that the + kernel provides. One such common class of devices that are + like this are for industrial I/O cards. + </para> + <para> + To address this situation, the userspace I/O system (UIO) was + designed. For typical industrial I/O cards, only a very small + kernel module is needed. The main part of the driver will run in + user space. This simplifies development and reduces the risk of + serious bugs within a kernel module. + </para> +</sect1> + +<sect1 id="thanks"> +<title>Acknowledgments</title> + <para>I'd like to thank Thomas Gleixner and Benedikt Spranger of + Linutronix, who have not only written most of the UIO code, but also + helped greatly writing this HOWTO by giving me all kinds of background + information.</para> +</sect1> + +<sect1 id="feedback"> +<title>Feedback</title> + <para>Find something wrong with this document? (Or perhaps something + right?) I would love to hear from you. Please email me at + <email>hjk@linutronix.de</email>.</para> +</sect1> +</chapter> + +<chapter id="about"> +<?dbhtml filename="about.html"?> +<title>About UIO</title> + +<para>If you use UIO for your card's driver, here's what you get:</para> + +<itemizedlist> +<listitem> + <para>only one small kernel module to write and maintain.</para> +</listitem> +<listitem> + <para>develop the main part of your driver in user space, + with all the tools and libraries you're used to.</para> +</listitem> +<listitem> + <para>bugs in your driver won't crash the kernel.</para> +</listitem> +<listitem> + <para>updates of your driver can take place without recompiling + the kernel.</para> +</listitem> +<listitem> + <para>if you need to keep some parts of your driver closed source, + you can do so without violating the GPL license on the kernel.</para> +</listitem> +</itemizedlist> + +<sect1 id="how_uio_works"> +<title>How UIO works</title> + <para> + Each UIO device is accessed through a device file and several + sysfs attribute files. The device file will be called + <filename>/dev/uio0</filename> for the first device, and + <filename>/dev/uio1</filename>, <filename>/dev/uio2</filename> + and so on for subsequent devices. + </para> + + <para><filename>/dev/uioX</filename> is used to access the + address space of the card. Just use + <function>mmap()</function> to access registers or RAM + locations of your card. + </para> + + <para> + Interrupts are handled by reading from + <filename>/dev/uioX</filename>. A blocking + <function>read()</function> from + <filename>/dev/uioX</filename> will return as soon as an + interrupt occurs. You can also use + <function>select()</function> on + <filename>/dev/uioX</filename> to wait for an interrupt. The + integer value read from <filename>/dev/uioX</filename> + represents the total interrupt count. You can use this number + to figure out if you missed some interrupts. + </para> + + <para> + To handle interrupts properly, your custom kernel module can + provide its own interrupt handler. It will automatically be + called by the built-in handler. + </para> + + <para> + For cards that don't generate interrupts but need to be + polled, there is the possibility to set up a timer that + triggers the interrupt handler at configurable time intervals. + See <filename>drivers/uio/uio_dummy.c</filename> for an + example of this technique. + </para> + + <para> + Each driver provides attributes that are used to read or write + variables. These attributes are accessible through sysfs + files. A custom kernel driver module can add its own + attributes to the device owned by the uio driver, but not added + to the UIO device itself at this time. This might change in the + future if it would be found to be useful. + </para> + + <para> + The following standard attributes are provided by the UIO + framework: + </para> +<itemizedlist> +<listitem> + <para> + <filename>name</filename>: The name of your device. It is + recommended to use the name of your kernel module for this. + </para> +</listitem> +<listitem> + <para> + <filename>version</filename>: A version string defined by your + driver. This allows the user space part of your driver to deal + with different versions of the kernel module. + </para> +</listitem> +<listitem> + <para> + <filename>event</filename>: The total number of interrupts + handled by the driver since the last time the device node was + read. + </para> +</listitem> +</itemizedlist> +<para> + These attributes appear under the + <filename>/sys/class/uio/uioX</filename> directory. Please + note that this directory might be a symlink, and not a real + directory. Any userspace code that accesses it must be able + to handle this. +</para> +<para> + Each UIO device can make one or more memory regions available for + memory mapping. This is necessary because some industrial I/O cards + require access to more than one PCI memory region in a driver. +</para> +<para> + Each mapping has its own directory in sysfs, the first mapping + appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>. + Subsequent mappings create directories <filename>map1/</filename>, + <filename>map2/</filename>, and so on. These directories will only + appear if the size of the mapping is not 0. +</para> +<para> + Each <filename>mapX/</filename> directory contains two read-only files + that show start address and size of the memory: +</para> +<itemizedlist> +<listitem> + <para> + <filename>addr</filename>: The address of memory that can be mapped. + </para> +</listitem> +<listitem> + <para> + <filename>size</filename>: The size, in bytes, of the memory + pointed to by addr. + </para> +</listitem> +</itemizedlist> + +<para> + From userspace, the different mappings are distinguished by adjusting + the <varname>offset</varname> parameter of the + <function>mmap()</function> call. To map the memory of mapping N, you + have to use N times the page size as your offset: +</para> +<programlisting format="linespecific"> +offset = N * getpagesize(); +</programlisting> + +</sect1> +</chapter> + +<chapter id="using-uio_dummy" xreflabel="Using uio_dummy"> +<?dbhtml filename="using-uio_dummy.html"?> +<title>Using uio_dummy</title> + <para> + Well, there is no real use for uio_dummy. Its only purpose is + to test most parts of the UIO system (everything except + hardware interrupts), and to serve as an example for the + kernel module that you will have to write yourself. + </para> + +<sect1 id="what_uio_dummy_does"> +<title>What uio_dummy does</title> + <para> + The kernel module <filename>uio_dummy.ko</filename> creates a + device that uses a timer to generate periodic interrupts. The + interrupt handler does nothing but increment a counter. The + driver adds two custom attributes, <varname>count</varname> + and <varname>freq</varname>, that appear under + <filename>/sys/devices/platform/uio_dummy/</filename>. + </para> + + <para> + The attribute <varname>count</varname> can be read and + written. The associated file + <filename>/sys/devices/platform/uio_dummy/count</filename> + appears as a normal text file and contains the total number of + timer interrupts. If you look at it (e.g. using + <function>cat</function>), you'll notice it is slowly counting + up. + </para> + + <para> + The attribute <varname>freq</varname> can be read and written. + The content of + <filename>/sys/devices/platform/uio_dummy/freq</filename> + represents the number of system timer ticks between two timer + interrupts. The default value of <varname>freq</varname> is + the value of the kernel variable <varname>HZ</varname>, which + gives you an interval of one second. Lower values will + increase the frequency. Try the following: + </para> +<programlisting format="linespecific"> +cd /sys/devices/platform/uio_dummy/ +echo 100 > freq +</programlisting> + <para> + Use <function>cat count</function> to see how the interrupt + frequency changes. + </para> +</sect1> +</chapter> + +<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module"> +<?dbhtml filename="custom_kernel_module.html"?> +<title>Writing your own kernel module</title> + <para> + Please have a look at <filename>uio_dummy.c</filename> as an + example. The following paragraphs explain the different + sections of this file. + </para> + +<sect1 id="uio_info"> +<title>struct uio_info</title> + <para> + This structure tells the framework the details of your driver, + Some of the members are required, others are optional. + </para> + +<itemizedlist> +<listitem><para> +<varname>char *name</varname>: Required. The name of your driver as +it will appear in sysfs. I recommend using the name of your module for this. +</para></listitem> + +<listitem><para> +<varname>char *version</varname>: Required. This string appears in +<filename>/sys/class/uio/uioX/version</filename>. +</para></listitem> + +<listitem><para> +<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you +have memory that can be mapped with <function>mmap()</function>. For each +mapping you need to fill one of the <varname>uio_mem</varname> structures. +See the description below for details. +</para></listitem> + +<listitem><para> +<varname>long irq</varname>: Required. If your hardware generates an +interrupt, it's your modules task to determine the irq number during +initialization. If you don't have a hardware generated interrupt but +want to trigger the interrupt handler in some other way, set +<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>. The +uio_dummy module does this as it triggers the event mechanism in a timer +routine. If you had no interrupt at all, you could set +<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this +rarely makes sense. +</para></listitem> + +<listitem><para> +<varname>unsigned long irq_flags</varname>: Required if you've set +<varname>irq</varname> to a hardware interrupt number. The flags given +here will be used in the call to <function>request_irq()</function>. +</para></listitem> + +<listitem><para> +<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct +*vma)</varname>: Optional. If you need a special +<function>mmap()</function> function, you can set it here. If this +pointer is not NULL, your <function>mmap()</function> will be called +instead of the built-in one. +</para></listitem> + +<listitem><para> +<varname>int (*open)(struct uio_info *info, struct inode *inode) +</varname>: Optional. You might want to have your own +<function>open()</function>, e.g. to enable interrupts only when your +device is actually used. +</para></listitem> + +<listitem><para> +<varname>int (*release)(struct uio_info *info, struct inode *inode) +</varname>: Optional. If you define your own +<function>open()</function>, you will probably also want a custom +<function>release()</function> function. +</para></listitem> +</itemizedlist> + +<para> +Usually, your device will have one or more memory regions that can be mapped +to user space. For each region, you have to set up a +<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array. +Here's a description of the fields of <varname>struct uio_mem</varname>: +</para> + +<itemizedlist> +<listitem><para> +<varname>int memtype</varname>: Required if the mapping is used. Set this to +<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your +card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical +memory (e.g. allocated with <function>kmalloc()</function>). There's also +<varname>UIO_MEM_VIRTUAL</varname> for virtual memory. +</para></listitem> + +<listitem><para> +<varname>unsigned long addr</varname>: Required if the mapping is used. +Fill in the address of your memory block. This address is the one that +appears in sysfs. +</para></listitem> + +<listitem><para> +<varname>unsigned long size</varname>: Fill in the size of the +memory block that <varname>addr</varname> points to. If <varname>size</varname> +is zero, the mapping is considered unused. Note that you +<emphasis>must</emphasis> initialize <varname>size</varname> with zero for +all unused mappings. +</para></listitem> + +<listitem><para> +<varname>void *internal_addr</varname>: If you have to access this memory +region from within your kernel module, you will want to map it internally by +using something like <function>ioremap()</function>. Addresses +returned by this function cannot be mapped to user space, so you must not +store it in <varname>addr</varname>. Use <varname>internal_addr</varname> +instead to remember such an address. +</para></listitem> +</itemizedlist> + +<para> +Please do not touch the <varname>kobj</varname> element of +<varname>struct uio_mem</varname>! It is used by the UIO framework +to set up sysfs files for this mapping. Simply leave it alone. +</para> +</sect1> + +<sect1 id="adding_irq_handler"> +<title>Adding an interrupt handler</title> + <para> + What you need to do in your interrupt handler depends on your + hardware and on how you want to handle it. You should try to + keep the amount of code in your kernel interrupt handler low. + If your hardware requires no action that you + <emphasis>have</emphasis> to perform after each interrupt, + then your handler can be empty.</para> <para>If, on the other + hand, your hardware <emphasis>needs</emphasis> some action to + be performed after each interrupt, then you + <emphasis>must</emphasis> do it in your kernel module. Note + that you cannot rely on the userspace part of your driver. Your + userspace program can terminate at any time, possibly leaving + your hardware in a state where proper interrupt handling is + still required. + </para> + + <para> + There might also be applications where you want to read data + from your hardware at each interrupt and buffer it in a piece + of kernel memory you've allocated for that purpose. With this + technique you could avoid loss of data if your userspace + program misses an interrupt. + </para> + + <para> + A note on shared interrupts: Your driver should support + interrupt sharing whenever this is possible. It is possible if + and only if your driver can detect whether your hardware has + triggered the interrupt or not. This is usually done by looking + at an interrupt status register. If your driver sees that the + IRQ bit is actually set, it will perform its actions, and the + handler returns IRQ_HANDLED. If the driver detects that it was + not your hardware that caused the interrupt, it will do nothing + and return IRQ_NONE, allowing the kernel to call the next + possible interrupt handler. + </para> + + <para> + If you decide not to support shared interrupts, your card + won't work in computers with no free interrupts. As this + frequently happens on the PC platform, you can save yourself a + lot of trouble by supporting interrupt sharing. + </para> +</sect1> + +</chapter> + +<chapter id="userspace_driver" xreflabel="Writing a driver in user space"> +<?dbhtml filename="userspace_driver.html"?> +<title>Writing a driver in userspace</title> + <para> + Once you have a working kernel module for your hardware, you can + write the userspace part of your driver. You don't need any special + libraries, your driver can be written in any reasonable language, + you can use floating point numbers and so on. In short, you can + use all the tools and libraries you'd normally use for writing a + userspace application. + </para> + +<sect1 id="getting_uio_information"> +<title>Getting information about your UIO device</title> + <para> + Information about all UIO devices is available in sysfs. The + first thing you should do in your driver is check + <varname>name</varname> and <varname>version</varname> to + make sure your talking to the right device and that its kernel + driver has the version you expect. + </para> + <para> + You should also make sure that the memory mapping you need + exists and has the size you expect. + </para> + <para> + There is a tool called <varname>lsuio</varname> that lists + UIO devices and their attributes. It is available here: + </para> + <para> + <ulink url="http://www.osadl.org/projects/downloads/UIO/user/"> + http://www.osadl.org/projects/downloads/UIO/user/</ulink> + </para> + <para> + With <varname>lsuio</varname> you can quickly check if your + kernel module is loaded and which attributes it exports. + Have a look at the manpage for details. + </para> + <para> + The source code of <varname>lsuio</varname> can serve as an + example for getting information about an UIO device. + The file <filename>uio_helper.c</filename> contains a lot of + functions you could use in your userspace driver code. + </para> +</sect1> + +<sect1 id="mmap_device_memory"> +<title>mmap() device memory</title> + <para> + After you made sure you've got the right device with the + memory mappings you need, all you have to do is to call + <function>mmap()</function> to map the device's memory + to userspace. + </para> + <para> + The parameter <varname>offset</varname> of the + <function>mmap()</function> call has a special meaning + for UIO devices: It is used to select which mapping of + your device you want to map. To map the memory of + mapping N, you have to use N times the page size as + your offset: + </para> +<programlisting format="linespecific"> + offset = N * getpagesize(); +</programlisting> + <para> + N starts from zero, so if you've got only one memory + range to map, set <varname>offset = 0</varname>. + A drawback of this technique is that memory is always + mapped beginning with its start address. + </para> +</sect1> + +<sect1 id="wait_for_interrupts"> +<title>Waiting for interrupts</title> + <para> + After you successfully mapped your devices memory, you + can access it like an ordinary array. Usually, you will + perform some initialization. After that, your hardware + starts working and will generate an interrupt as soon + as it's finished, has some data available, or needs your + attention because an error occured. + </para> + <para> + <filename>/dev/uioX</filename> is a read-only file. A + <function>read()</function> will always block until an + interrupt occurs. There is only one legal value for the + <varname>count</varname> parameter of + <function>read()</function>, and that is the size of a + signed 32 bit integer (4). Any other value for + <varname>count</varname> causes <function>read()</function> + to fail. The signed 32 bit integer read is the interrupt + count of your device. If the value is one more than the value + you read the last time, everything is OK. If the difference + is greater than one, you missed interrupts. + </para> + <para> + You can also use <function>select()</function> on + <filename>/dev/uioX</filename>. + </para> +</sect1> + +</chapter> + +<appendix id="app1"> +<title>Further information</title> +<itemizedlist> + <listitem><para> + <ulink url="http://www.osadl.org"> + OSADL homepage.</ulink> + </para></listitem> + <listitem><para> + <ulink url="http://www.linutronix.de"> + Linutronix homepage.</ulink> + </para></listitem> +</itemizedlist> +</appendix> + +</book> diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 98e2701c746f..f8cc3f8ed152 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -249,6 +249,9 @@ process is as follows: release a new -rc kernel every week. - Process continues until the kernel is considered "ready", the process should last around 6 weeks. + - A list of known regressions present in each -rc release is + tracked at the following URI: + http://kernelnewbies.org/known_regressions It is worth mentioning what Andrew Morton wrote on the linux-kernel mailing list about kernel releases: diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c index 3e73231695b3..be7af146dd30 100644 --- a/Documentation/connector/cn_test.c +++ b/Documentation/connector/cn_test.c @@ -124,9 +124,8 @@ static void cn_test_timer_func(unsigned long __data) struct cn_msg *m; char data[32]; - m = kmalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC); + m = kzalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC); if (m) { - memset(m, 0, sizeof(*m) + sizeof(data)); memcpy(&m->id, &cn_test_id, sizeof(m->id)); m->seq = cn_test_timer_counter; diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt index d3e17447321c..877a1b26cc3d 100644 --- a/Documentation/console/console.txt +++ b/Documentation/console/console.txt @@ -29,7 +29,7 @@ In newer kernels, the following are also available: If sysfs is enabled, the contents of /sys/class/vtconsole can be examined. This shows the console backends currently registered by the -system which are named vtcon<n> where <n> is an integer fro 0 to 15. Thus: +system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus: ls /sys/class/vtconsole . .. vtcon0 vtcon1 diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 6c8d8f27db34..8569072fa387 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -207,7 +207,7 @@ responsibility. This is usually non-issue because bus ops and resource allocations already do the job. For an example of single-instance devres type, read pcim_iomap_table() -in lib/iomap.c. +in lib/devres.c. All devres interface functions can be called without context if the right gfp mask is given. diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/drivers/edac/edac.txt index 3c5a9e4297b4..a5c36842ecef 100644 --- a/Documentation/drivers/edac/edac.txt +++ b/Documentation/drivers/edac/edac.txt @@ -2,22 +2,42 @@ EDAC - Error Detection And Correction -Written by Doug Thompson <norsk5@xmission.com> +Written by Doug Thompson <dougthompson@xmission.com> 7 Dec 2005 +17 Jul 2007 Updated -EDAC was written by: - Thayne Harbaugh, - modified by Dave Peterson, Doug Thompson, et al, - from the bluesmoke.sourceforge.net project. +EDAC is maintained and written by: + Doug Thompson, Dave Jiang, Dave Peterson et al, + original author: Thayne Harbaugh, + +Contact: + website: bluesmoke.sourceforge.net + mailing list: bluesmoke-devel@lists.sourceforge.net + +"bluesmoke" was the name for this device driver when it was "out-of-tree" +and maintained at sourceforge.net. When it was pushed into 2.6.16 for the +first time, it was renamed to 'EDAC'. + +The bluesmoke project at sourceforge.net is now utilized as a 'staging area' +for EDAC development, before it is sent upstream to kernel.org + +At the bluesmoke/EDAC project site, is a series of quilt patches against +recent kernels, stored in a SVN respository. For easier downloading, there +is also a tarball snapshot available. ============================================================================ EDAC PURPOSE The 'edac' kernel module goal is to detect and report errors that occur -within the computer system. In the initial release, memory Correctable Errors -(CE) and Uncorrectable Errors (UE) are the primary errors being harvested. +within the computer system running under linux. + +MEMORY + +In the initial release, memory Correctable Errors (CE) and Uncorrectable +Errors (UE) are the primary errors being harvested. These types of errors +are harvested by the 'edac_mc' class of device. Detecting CE events, then harvesting those events and reporting them, CAN be a predictor of future UE events. With CE events, the system can @@ -25,9 +45,27 @@ continue to operate, but with less safety. Preventive maintenance and proactive part replacement of memory DIMMs exhibiting CEs can reduce the likelihood of the dreaded UE events and system 'panics'. +NON-MEMORY + +A new feature for EDAC, the edac_device class of device, was added in +the 2.6.23 version of the kernel. + +This new device type allows for non-memory type of ECC hardware detectors +to have their states harvested and presented to userspace via the sysfs +interface. + +Some architectures have ECC detectors for L1, L2 and L3 caches, along with DMA +engines, fabric switches, main data path switches, interconnections, +and various other hardware data paths. If the hardware reports it, then +a edac_device device probably can be constructed to harvest and present +that to userspace. + + +PCI BUS SCANNING In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices in order to determine if errors are occurring on data transfers. + The presence of PCI Parity errors must be examined with a grain of salt. There are several add-in adapters that do NOT follow the PCI specification with regards to Parity generation and reporting. The specification says @@ -35,11 +73,17 @@ the vendor should tie the parity status bits to 0 if they do not intend to generate parity. Some vendors do not do this, and thus the parity bit can "float" giving false positives. -[There are patches in the kernel queue which will allow for storage of -quirks of PCI devices reporting false parity positives. The 2.6.18 -kernel should have those patches included. When that becomes available, -then EDAC will be patched to utilize that information to "skip" such -devices.] +In the kernel there is a pci device attribute located in sysfs that is +checked by the EDAC PCI scanning code. If that attribute is set, +PCI parity/error scannining is skipped for that device. The attribute +is: + + broken_parity_status + +as is located in /sys/devices/pci<XXX>/0000:XX:YY.Z directorys for +PCI devices. + +FUTURE HARDWARE SCANNING EDAC will have future error detectors that will be integrated with EDAC or added to it, in the following list: @@ -57,13 +101,14 @@ and the like. ============================================================================ EDAC VERSIONING -EDAC is composed of a "core" module (edac_mc.ko) and several Memory +EDAC is composed of a "core" module (edac_core.ko) and several Memory Controller (MC) driver modules. On a given system, the CORE is loaded and one MC driver will be loaded. Both the CORE and -the MC driver have individual versions that reflect current release -level of their respective modules. Thus, to "report" on what version -a system is running, one must report both the CORE's and the -MC driver's versions. +the MC driver (or edac_device driver) have individual versions that reflect +current release level of their respective modules. + +Thus, to "report" on what version a system is running, one must report both +the CORE's and the MC driver's versions. LOADING @@ -88,8 +133,9 @@ EDAC sysfs INTERFACE EDAC presents a 'sysfs' interface for control, reporting and attribute reporting purposes. -EDAC lives in the /sys/devices/system/edac directory. Within this directory -there currently reside 2 'edac' components: +EDAC lives in the /sys/devices/system/edac directory. + +Within this directory there currently reside 2 'edac' components: mc memory controller(s) system pci PCI control and status system @@ -188,7 +234,7 @@ In directory 'mc' are EDAC system overall control and attribute files: Panic on UE control file: - 'panic_on_ue' + 'edac_mc_panic_on_ue' An uncorrectable error will cause a machine panic. This is usually desirable. It is a bad idea to continue when an uncorrectable error @@ -199,12 +245,12 @@ Panic on UE control file: LOAD TIME: module/kernel parameter: panic_on_ue=[0|1] - RUN TIME: echo "1" >/sys/devices/system/edac/mc/panic_on_ue + RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue Log UE control file: - 'log_ue' + 'edac_mc_log_ue' Generate kernel messages describing uncorrectable errors. These errors are reported through the system message log system. UE statistics @@ -212,12 +258,12 @@ Log UE control file: LOAD TIME: module/kernel parameter: log_ue=[0|1] - RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ue + RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue Log CE control file: - 'log_ce' + 'edac_mc_log_ce' Generate kernel messages describing correctable errors. These errors are reported through the system message log system. @@ -225,12 +271,12 @@ Log CE control file: LOAD TIME: module/kernel parameter: log_ce=[0|1] - RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ce + RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce Polling period control file: - 'poll_msec' + 'edac_mc_poll_msec' The time period, in milliseconds, for polling for error information. Too small a value wastes resources. Too large a value might delay @@ -241,7 +287,7 @@ Polling period control file: LOAD TIME: module/kernel parameter: poll_msec=[0|1] - RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec + RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec ============================================================================ @@ -587,3 +633,95 @@ Parity Count: ======================================================================= + + +EDAC_DEVICE type of device + +In the header file, edac_core.h, there is a series of edac_device structures +and APIs for the EDAC_DEVICE. + +User space access to an edac_device is through the sysfs interface. + +At the location /sys/devices/system/edac (sysfs) new edac_device devices will +appear. + +There is a three level tree beneath the above 'edac' directory. For example, +the 'test_device_edac' device (found at the bluesmoke.sourceforget.net website) +installs itself as: + + /sys/devices/systm/edac/test-instance + +in this directory are various controls, a symlink and one or more 'instance' +directorys. + +The standard default controls are: + + log_ce boolean to log CE events + log_ue boolean to log UE events + panic_on_ue boolean to 'panic' the system if an UE is encountered + (default off, can be set true via startup script) + poll_msec time period between POLL cycles for events + +The test_device_edac device adds at least one of its own custom control: + + test_bits which in the current test driver does nothing but + show how it is installed. A ported driver can + add one or more such controls and/or attributes + for specific uses. + One out-of-tree driver uses controls here to allow + for ERROR INJECTION operations to hardware + injection registers + +The symlink points to the 'struct dev' that is registered for this edac_device. + +INSTANCES + +One or more instance directories are present. For the 'test_device_edac' case: + + test-instance0 + + +In this directory there are two default counter attributes, which are totals of +counter in deeper subdirectories. + + ce_count total of CE events of subdirectories + ue_count total of UE events of subdirectories + +BLOCKS + +At the lowest directory level is the 'block' directory. There can be 0, 1 +or more blocks specified in each instance. + + test-block0 + + +In this directory the default attributes are: + + ce_count which is counter of CE events for this 'block' + of hardware being monitored + ue_count which is counter of UE events for this 'block' + of hardware being monitored + + +The 'test_device_edac' device adds 4 attributes and 1 control: + + test-block-bits-0 for every POLL cycle this counter + is incremented + test-block-bits-1 every 10 cycles, this counter is bumped once, + and test-block-bits-0 is set to 0 + test-block-bits-2 every 100 cycles, this counter is bumped once, + and test-block-bits-1 is set to 0 + test-block-bits-3 every 1000 cycles, this counter is bumped once, + and test-block-bits-2 is set to 0 + + + reset-counters writing ANY thing to this control will + reset all the above counters. + + +Use of the 'test_device_edac' driver should any others to create their own +unique drivers for their hardware systems. + +The 'test_device_edac' sample driver is located at the +bluesmoke.sourceforge.net project site for EDAC. + diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d05e6243b4df..a9941544ed8e 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -26,9 +26,7 @@ Who: Hans Verkuil <hverkuil@xs4all.nl> and --------------------------- -What: /sys/devices/.../power/state - dev->power.power_state - dpm_runtime_{suspend,resume)() +What: dev->power.power_state When: July 2007 Why: Broken design for runtime control over driver power states, confusing driver-internal runtime power management with: mechanisms to support @@ -53,6 +51,7 @@ Who: David Miller <davem@davemloft.net> What: Video4Linux API 1 ioctls and video_decoder.h from Video devices. When: December 2006 Files: include/linux/video_decoder.h +Check: include/linux/video_decoder.h Why: V4L1 AP1 was replaced by V4L2 API. during migration from 2.4 to 2.6 series. The old API have lots of drawbacks and don't provide enough means to work with all video and audio standards. The newer API is @@ -86,7 +85,7 @@ Who: Dominik Brodowski <linux@brodo.de> What: remove EXPORT_SYMBOL(kernel_thread) When: August 2006 Files: arch/*/kernel/*_ksyms.c -Funcs: kernel_thread +Check: kernel_thread Why: kernel_thread is a low-level implementation detail. Drivers should use the <linux/kthread.h> API instead which shields them from implementation details and provides a higherlevel interface that @@ -137,6 +136,15 @@ Who: Greg Kroah-Hartman <gregkh@suse.de> --------------------------- +What: vm_ops.nopage +When: Soon, provided in-kernel callers have been converted +Why: This interface is replaced by vm_ops.fault, but it has been around + forever, is used by a lot of drivers, and doesn't cost much to + maintain. +Who: Nick Piggin <npiggin@suse.de> + +--------------------------- + What: Interrupt only SA_* flags When: September 2007 Why: The interrupt related SA_* flags are replaced by IRQF_* to move them @@ -310,3 +318,13 @@ Why: The arch/powerpc tree is the merged architecture for ppc32 and ppc64 Who: linuxppc-dev@ozlabs.org --------------------------- + +What: mthca driver's MSI support +When: January 2008 +Files: drivers/infiniband/hw/mthca/*.[ch] +Why: All mthca hardware also supports MSI-X, which provides + strictly more functionality than MSI. So there is no point in + having both MSI-X and MSI support in the driver. +Who: Roland Dreier <rolandd@cisco.com> + +--------------------------- diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d866551be037..f0f825808ca4 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -510,13 +510,24 @@ More details about quota locking can be found in fs/dquot.c. prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); + int (*fault)(struct vm_area_struct*, struct vm_fault *); struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); + int (*page_mkwrite)(struct vm_area_struct *, struct page *); locking rules: - BKL mmap_sem + BKL mmap_sem PageLocked(page) open: no yes close: no yes +fault: no yes nopage: no yes +page_mkwrite: no yes no + + ->page_mkwrite() is called when a previously read-only page is +about to become writeable. The file system is responsible for +protecting against truncate races. Once appropriate action has been +taking to lock out truncate, the page range should be verified to be +within i_size. The page mapping should also be checked that it is not +NULL. ================================================================================ Dubious stuff diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c index e56d49264b39..25151fd5c2c6 100644 --- a/Documentation/filesystems/configfs/configfs_example.c +++ b/Documentation/filesystems/configfs/configfs_example.c @@ -277,11 +277,10 @@ static struct config_item *simple_children_make_item(struct config_group *group, { struct simple_child *simple_child; - simple_child = kmalloc(sizeof(struct simple_child), GFP_KERNEL); + simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); if (!simple_child) return NULL; - memset(simple_child, 0, sizeof(struct simple_child)); config_item_init_type_name(&simple_child->item, name, &simple_child_type); @@ -364,12 +363,11 @@ static struct config_group *group_children_make_group(struct config_group *group { struct simple_children *simple_children; - simple_children = kmalloc(sizeof(struct simple_children), + simple_children = kzalloc(sizeof(struct simple_children), GFP_KERNEL); if (!simple_children) return NULL; - memset(simple_children, 0, sizeof(struct simple_children)); config_group_init_type_name(&simple_children->group, name, &simple_children_type); diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ebffdffb3d99..4a37e25e694c 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -42,6 +42,7 @@ Table of Contents 2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score 2.13 /proc/<pid>/oom_score - Display current oom-killer score 2.14 /proc/<pid>/io - Display the IO accounting fields + 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings ------------------------------------------------------------------------------ Preface @@ -1065,6 +1066,13 @@ check the amount of free space (value is in seconds). Default settings are: 4, resume it if we have a value of 3 or more percent; consider information about the amount of free space valid for 30 seconds +audit_argv_kb +------------- + +The file contains a single value denoting the limit on the argv array size +for execve (in KiB). This limit is only applied when system call auditing for +execve is enabled, otherwise the value is ignored. + ctrl-alt-del ------------ @@ -2177,4 +2185,41 @@ those 64-bit counters, process A could see an intermediate result. More information about this can be found within the taskstats documentation in Documentation/accounting. +2.15 /proc/<pid>/coredump_filter - Core dump filtering settings +--------------------------------------------------------------- +When a process is dumped, all anonymous memory is written to a core file as +long as the size of the core file isn't limited. But sometimes we don't want +to dump some memory segments, for example, huge shared memory. Conversely, +sometimes we want to save file-backed memory segments into a core file, not +only the individual files. + +/proc/<pid>/coredump_filter allows you to customize which memory segments +will be dumped when the <pid> process is dumped. coredump_filter is a bitmask +of memory types. If a bit of the bitmask is set, memory segments of the +corresponding memory type are dumped, otherwise they are not dumped. + +The following 4 memory types are supported: + - (bit 0) anonymous private memory + - (bit 1) anonymous shared memory + - (bit 2) file-backed private memory + - (bit 3) file-backed shared memory + + Note that MMIO pages such as frame buffer are never dumped and vDSO pages + are always dumped regardless of the bitmask status. + +Default value of coredump_filter is 0x3; this means all anonymous memory +segments are dumped. + +If you don't want to dump all shared memory segments attached to pid 1234, +write 1 to the process's proc file. + + $ echo 0x1 > /proc/1234/coredump_filter + +When a new process is created, the process inherits the bitmask status from its +parent. It is useful to set up coredump_filter before the program runs. +For example: + + $ echo 0x7 > /proc/self/coredump_filter + $ ./some_program + ------------------------------------------------------------------------------ diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 36af58eba136..218a8650f48d 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -75,6 +75,9 @@ using the include file: If you stick to this convention then it'll be easier for other developers to see what your code is doing, and help maintain it. +Note that these operations include I/O barriers on platforms which need to +use them; drivers don't need to add them explicitly. + Identifying GPIOs ----------------- diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO new file mode 100644 index 000000000000..b2446a090870 --- /dev/null +++ b/Documentation/ja_JP/HOWTO @@ -0,0 +1,650 @@ +NOTE: +This is Japanese translated version of "Documentation/HOWTO". +This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com> +and JF Project team <www.linux.or.jp/JF>. +If you find difference with original file or problem in translation, +please contact maintainer of this file or JF project. + +Please also note that purpose of this file is easier to read for non +English natives and not to be intended to fork. So, if you have any +comments or updates of this file, please try to update Original(English) +file at first. + +Last Updated: 2007/06/04 +================================== +これは、 +linux-2.6.21/Documentation/HOWTO +の和訳です。 + +翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > +翻訳日: 2007/06/04 +翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com> +校正者: 松倉さん <nbh--mats at nifty dot com> + 小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp> + 武井伸光さん、<takei at webmasters dot gr dot jp> + かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp> + 野口さん (Kenji Noguchi) <tokyo246 at gmail dot com> + 河内さん (Takayoshi Kochi) <t-kochi at bq dot jp dot nec dot com> + 岩本さん (iwamoto) <iwamoto.kn at ncos dot nec dot co dot jp> +================================== + +Linux カーネル開発のやり方 +------------------------------- + +これは上のトピック( Linux カーネル開発のやり方)の重要な事柄を網羅した +ドキュメントです。ここには Linux カーネル開発者になるための方法と +Linux カーネル開発コミュニティと共に活動するやり方を学ぶ方法が含まれて +います。カーネルプログラミングに関する技術的な項目に関することは何も含 +めないようにしていますが、カーネル開発者となるための正しい方向に向かう +手助けになります。 + +もし、このドキュメントのどこかが古くなっていた場合には、このドキュメン +トの最後にリストしたメンテナーにパッチを送ってください。 + +はじめに +--------- + +あなたは Linux カーネルの開発者になる方法を学びたいのでしょうか? そ +れともあなたは上司から「このデバイスの Linux ドライバを書くように」と +言われているのでしょうか? +この文書の目的は、あなたが踏むべき手順と、コミュニティと一緒にうまく働 +くヒントを書き下すことで、あなたが知るべき全てのことを教えることです。 +また、このコミュニティがなぜ今うまくまわっているのかという理由の一部も +説明しようと試みています。 + +カーネルは 少量のアーキテクチャ依存部分がアセンブリ言語で書かれている +以外は大部分は C 言語で書かれています。C言語をよく理解していることはカー +ネル開発者には必要です。アーキテクチャ向けの低レベル部分の開発をするの +でなければ、(どんなアーキテクチャでも)アセンブリ(訳注: 言語)は必要あり +ません。以下の本は、C 言語の十分な知識や何年もの経験に取って代わるもの +ではありませんが、少なくともリファレンスとしてはいい本です。 + - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall] + -『プログラミング言語C第2版』(B.W. カーニハン/D.M. リッチー著 石田晴久訳) [共立出版] + - "Practical C Programming" by Steve Oualline [O'Reilly] + - 『C実践プログラミング第3版』(Steve Oualline著 望月康司監訳 谷口功訳) [オライリージャパン] + - "C: A Reference Manual" by Harbison and Steele [Prentice Hall] + - 『新・詳説 C 言語 H&S リファレンス』 + (サミュエル P ハービソン/ガイ L スティール共著 斉藤 信男監訳)[ソフトバンク] + +カーネルは GNU C と GNU ツールチェインを使って書かれています。カーネル +は ISO C89 仕様に準拠して書く一方で、標準には無い言語拡張を多く使って +います。カーネルは標準 C ライブラリとは関係がないといった、C 言語フリー +スタンディング環境です。そのため、C の標準で使えないものもあります。任 +意の long long の除算や浮動小数点は使えません。 +ときどき、カーネルがツールチェインや C 言語拡張に置いている前提がどう +なっているのかわかりにくいことがあり、また、残念なことに決定的なリファ +レンスは存在しません。情報を得るには、gcc の info ページ( info gcc )を +みてください。 + +あなたは既存の開発コミュニティと一緒に作業する方法を学ぼうとしているこ +とに留意してください。そのコミュニティは、コーディング、スタイル、 +開発手順について高度な標準を持つ、多様な人の集まりです。 +地理的に分散した大規模なチームに対してもっともうまくいくとわかったこと +をベースにしながら、これらの標準は長い時間をかけて築かれてきました。 +これらはきちんと文書化されていますから、事前にこれらの標準についてでき +るだけたくさん学んでください。また皆があなたやあなたの会社のやり方に合わ +せてくれると思わないでください。 + +法的問題 +------------ + +Linux カーネルのソースコードは GPL ライセンスの下でリリースされていま +す。ライセンスの詳細については、ソースツリーのメインディレクトリに存在 +する、COPYING のファイルをみてください。もしライセンスについてさらに質 +問があれば、Linux Kernel メーリングリストに質問するのではなく、どうぞ +法律家に相談してください。メーリングリストの人達は法律家ではなく、法的 +問題については彼らの声明はあてにするべきではありません。 + +GPL に関する共通の質問や回答については、以下を参照してください。 + http://www.gnu.org/licenses/gpl-faq.html + +ドキュメント +------------ + +Linux カーネルソースツリーは幅広い範囲のドキュメントを含んでおり、それ +らはカーネルコミュニティと会話する方法を学ぶのに非常に貴重なものです。 +新しい機能がカーネルに追加される場合、その機能の使い方について説明した +新しいドキュメントファイルも追加することを勧めます。 +カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの +変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報 +をマニュアルページのメンテナ mtk-manpages@gmx.net に送ることを勧めます。 + +以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で +す- + + README + このファイルは Linuxカーネルの簡単な背景とカーネルを設定(訳注 + configure )し、生成(訳注 build )するために必要なことは何かが書かれ + ています。カーネルに関して初めての人はここからスタートするとよいで + しょう。 + + Documentation/Changes + このファイルはカーネルをうまく生成(訳注 build )し、走らせるのに最 + 小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい + ます。 + + Documentation/CodingStyle + これは Linux カーネルのコーディングスタイルと背景にある理由を記述 + しています。全ての新しいコードはこのドキュメントにあるガイドライン + に従っていることを期待されています。大部分のメンテナーはこれらのルー + ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード + だけをレビューします。 + + Documentation/SubmittingPatches + Documentation/SubmittingDrivers + これらのファイルには、どうやってうまくパッチを作って投稿するかに + ついて非常に詳しく書かれており、以下を含みます(これだけに限らない + けれども) + - Email に含むこと + - Email の形式 + - だれに送るか + これらのルールに従えばうまくいくことを保証することではありません + が (すべてのパッチは内容とスタイルについて精査を受けるので)、 + ルールに従わなければ間違いなくうまくいかないでしょう。 + この他にパッチを作る方法についてのよくできた記述は- + + "The Perfect Patch" + http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt + "Linux kernel patch submission format" + http://linux.yyz.us/patch-format.html + + Documentation/stable_api_nonsense.txt + このファイルはカーネルの中に不変のAPIを持たないことにした意識的な + 決断の背景にある理由について書かれています。以下のようなことを含 + んでいます- + - サブシステムとの間に層を作ること(コンパチビリティのため?) + - オペレーティングシステム間のドライバの移植性 + - カーネルソースツリーの素早い変更を遅らせる(もしくは素早い変更 + を妨げる) + このドキュメントは Linux 開発の思想を理解するのに非常に重要です。 + そして、他のOSでの開発者が Linux に移る時にとても重要です。 + + Documentation/SecurityBugs + もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ + のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を + 支援してください。 + + Documentation/ManagementStyle + このドキュメントは Linux カーネルのメンテナー達がどう行動するか、 + 彼らの手法の背景にある共有されている精神について記述しています。こ + れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも) + 重要です。なぜならこのドキュメントは、カーネルメンテナー達の独特な + 行動についての多くの誤解や混乱を解消するからです。 + + Documentation/stable_kernel_rules.txt + このファイルはどのように stable カーネルのリリースが行われるかのルー + ルが記述されています。そしてこれらのリリースの中のどこかで変更を取 + り入れてもらいたい場合に何をすればいいかが示されています。 + + Documentation/kernel-docs.txt + カーネル開発に付随する外部ドキュメントのリストです。もしあなたが + 探しているものがカーネル内のドキュメントでみつからなかった場合、 + このリストをあたってみてください。 + + Documentation/applying-patches.txt + パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに + 適用するのかについて正確に記述した良い入門書です。 + +カーネルはソースコードから自動的に生成可能な多数のドキュメントを自分自 +身でもっています。これにはカーネル内 API のすべての記述や、どう正しく +ロックをかけるかの規則が含まれます。このドキュメントは +Documentation/DocBook/ ディレクトリに作られ、以下のように + make pdfdocs + make psdocs + make htmldocs + make mandocs +コマンドを実行するとメインカーネルのソースディレクトリから +それぞれ、PDF, Postscript, HTML, man page の形式で生成されます。 + +カーネル開発者になるには +--------------------------- + +もしあなたが、Linux カーネル開発について何も知らないならば、 +KernelNewbies プロジェクトを見るべきです + http://kernelnewbies.org + +このサイトには役に立つメーリングリストがあり、基本的なカーネル開発に関 +するほとんどどんな種類の質問もできます (既に回答されているようなことを +聞く前にまずはアーカイブを調べてください)。 +またここには、リアルタイムで質問を聞くことができる IRC チャネルや、Linux +カーネルの開発に関して学ぶのに便利なたくさんの役に立つドキュメントがあ +ります。 + +web サイトには、コードの構成、サブシステム、現在存在するプロジェクト(ツ +リーにあるもの無いものの両方)の基本的な管理情報があります。 +ここには、また、カーネルのコンパイルのやり方やパッチの当て方などの間接 +的な基本情報も記述されています。 + +あなたがどこからスタートしてよいかわからないが、Linux カーネル開発コミュ +ニティに参加して何かすることをさがしている場合には、Linux kernel +Janitor's プロジェクトにいけばよいでしょう - + http://janitor.kernelnewbies.org/ +ここはそのようなスタートをするのにうってつけの場所です。ここには、 +Linux カーネルソースツリーの中に含まれる、きれいにし、修正しなければな +らない、単純な問題のリストが記述されています。このプロジェクトに関わる +開発者と一緒に作業することで、あなたのパッチを Linuxカーネルツリーに入 +れるための基礎を学ぶことができ、そしてもしあなたがまだアイディアを持っ +ていない場合には、次にやる仕事の方向性が見えてくるかもしれません。 + +もしあなたが、すでにひとまとまりコードを書いていて、カーネルツリーに入 +れたいと思っていたり、それに関する適切な支援を求めたい場合、カーネル +メンターズプロジェクトはそのような皆さんを助けるためにできました。 +ここにはメーリングリストがあり、以下から参照できます + http://selenic.com/mailman/listinfo/kernel-mentors + +実際に Linux カーネルのコードについて修正を加える前に、どうやってその +コードが動作するのかを理解することが必要です。そのためには、特別なツー +ルの助けを借りてでも、それを直接よく読むことが最良の方法です(ほとんど +のトリッキーな部分は十分にコメントしてありますから)。そういうツールで +特におすすめなのは、Linux クロスリファレンスプロジェクトです。これは、 +自己参照方式で、索引がついた web 形式で、ソースコードを参照することが +できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり +ます- + http://sosdg.org/~coywolf/lxr/ + +開発プロセス +----------------------- + +Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン +チ」と多数のサブシステム毎のカーネルブランチから構成されます。 +これらのブランチとは- + - メインの 2.6.x カーネルツリー + - 2.6.x.y -stable カーネルツリー + - 2.6.x -git カーネルパッチ + - 2.6.x -mm カーネルパッチ + - サブシステム毎のカーネルツリーとパッチ + +2.6.x カーネルツリー +----------------- + +2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org +の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは +以下のとおり- + + - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、 + この期間中に、メンテナー達は Linus に大きな差分を送ることができま + す。このような差分は通常 -mm カーネルに数週間含まれてきたパッチで + す。 大きな変更は git(カーネルのソース管理ツール、詳細は + http://git.or.cz/ 参照) を使って送るのが好ましいやり方ですが、パッ + チファイルの形式のまま送るのでも十分です。 + + - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定 + 性に影響をあたえるような新機能は含まない類のパッチしか取り込むこと + はできません。新しいドライバ(もしくはファイルシステム)のパッチは + -rc1 の後で受け付けられることもあることを覚えておいてください。な + ぜなら、変更が独立していて、追加されたコードの外の領域に影響を与え + ない限り、退行のリスクは無いからです。-rc1 がリリースされた後、 + Linus へパッチを送付するのに git を使うこともできますが、パッチは + レビューのために、パブリックなメーリングリストへも同時に送る必要が + あります。 + + - 新しい -rc は Linus が、最新の git ツリーがテスト目的であれば十分 + に安定した状態にあると判断したときにリリースされます。目標は毎週新 + しい -rc カーネルをリリースすることです。 + + - このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま + す。このプロセスはだいたい 6週間継続します。 + +Andrew Morton が Linux-kernel メーリングリストにカーネルリリースについ +て書いたことをここで言っておくことは価値があります- + 「カーネルがいつリリースされるかは誰も知りません。なぜなら、これは現 + 実に認識されたバグの状況によりリリースされるのであり、前もって決めら + れた計画によってリリースされるものではないからです。」 + +2.6.x.y -stable カーネルツリー +--------------------------- + +バージョンに4つ目の数字がついたカーネルは -stable カーネルです。これに +は、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対する比 +較的小さい重要な修正が含まれます。 + +これは、開発/実験的バージョンのテストに協力することに興味が無く、 +最新の安定したカーネルを使いたいユーザに推奨するブランチです。 + +もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x +が最新の安定版カーネルです。 + +2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、だ +いたい隔週でリリースされています。 + +カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ +イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ +リースプロセスがどう動くかが記述されています。 + +2.6.x -git パッチ +------------------ + +git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ +ショットがあります。(だから -git という名前がついています)。これらのパッ +チはおおむね毎日リリースされており、Linus のツリーの現状を表します。こ +れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的 +に生成されるので、より実験的です。 + +2.6.x -mm カーネルパッチ +------------------------ + +Andrew Morton によってリリースされる実験的なカーネルパッチ群です。 +Andrew は個別のサブシステムカーネルツリーとパッチを全て集めてきて +linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま +とめます。 +このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ +が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、メ +インラインへ入れるように Linus にプッシュします。 + +メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ +チが -mm ツリーでテストされることが強く推奨されます。 + +これらのカーネルは安定して動作すべきシステムとして使うのには適切ではあ +りませんし、カーネルブランチの中でももっとも動作にリスクが高いものです。 + +もしあなたが、カーネル開発プロセスの支援をしたいと思っているのであれば、 +どうぞこれらのカーネルリリースをテストに使ってみて、そしてもし問題があ +れば、またもし全てが正しく動作したとしても、linux-kernel メーリングリ +ストにフィードバックを提供してください。 + +すべての他の実験的パッチに加えて、これらのカーネルは通常リリース時点で +メインラインの -git カーネルに含まれる全ての変更も含んでいます。 + +-mm カーネルは決まったスケジュールではリリースされません、しかし通常幾 +つかの -mm カーネル (1 から 3 が普通)が各-rc カーネルの間にリリースさ +れます。 + +サブシステム毎のカーネルツリーとパッチ +------------------------------------------- + +カーネルの様々な領域で何が起きているかを見られるようにするため、多くの +カーネルサブシステム開発者は彼らの開発ツリーを公開しています。これらの +ツリーは説明したように -mm カーネルリリースに入れ込まれます。 + +以下はさまざまなカーネルツリーの中のいくつかのリスト- + + git ツリー- + - Kbuild の開発ツリー、Sam Ravnborg <sam@ravnborg.org> + kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git + + - ACPI の開発ツリー、 Len Brown <len.brown@intel.com> + kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git + + - Block の開発ツリー、Jens Axboe <axboe@suse.de> + kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git + + - DRM の開発ツリー、Dave Airlie <airlied@linux.ie> + kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git + + - ia64 の開発ツリー、Tony Luck <tony.luck@intel.com> + kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git + + - ieee1394 の開発ツリー、Jody McIntyre <scjody@modernduck.com> + kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git + + - infiniband, Roland Dreier <rolandd@cisco.com> + kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git + + - libata, Jeff Garzik <jgarzik@pobox.com> + kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git + + - ネットワークドライバ, Jeff Garzik <jgarzik@pobox.com> + kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git + + - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net> + kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git + + - SCSI, James Bottomley <James.Bottomley@SteelEye.com> + kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git + + その他の git カーネルツリーは http://kernel.org/git に一覧表がありま + す。 + + quilt ツリー- + - USB, PCI ドライバコアと I2C, Greg Kroah-Hartman <gregkh@suse.de> + kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ + +バグレポート +------------- + +bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを追跡する +場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。 +どう kernel bugzilla を使うかの詳細は、以下を参照してください- + http://test.kernel.org/bugzilla/faq.html + +メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ +ルバグらしいものについてどうレポートするかの良いテンプレートであり、問 +題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳 +細が書かれています。 + +メーリングリスト +------------- + +上のいくつかのドキュメントで述べていますが、コアカーネル開発者の大部分 +は Linux kernel メーリングリストに参加しています。このリストの登録/脱 +退の方法については以下を参照してください- + http://vger.kernel.org/vger-lists.html#linux-kernel + +このメーリングリストのアーカイブは web 上の多数の場所に存在します。こ +れらのアーカイブを探すにはサーチエンジンを使いましょう。例えば- + http://dir.gmane.org/gmane.linux.kernel + +リストに投稿する前にすでにその話題がアーカイブに存在するかどうかを検索 +することを是非やってください。多数の事がすでに詳細に渡って議論されて +おり、アーカイブにのみ記録されています。 + +大部分のカーネルサブシステムも自分の個別の開発を実施するメーリングリス +トを持っています。個々のグループがどんなリストを持っているかは、 +MAINTAINERS ファイルにリストがありますので参照してください。 + +多くのリストは kernel.org でホストされています。これらの情報は以下にあ +ります- + http://vger.kernel.org/vger-lists.html + +メーリングリストを使う場合、良い行動習慣に従うようにしましょう。 +少し安っぽいが、以下の URL は上のリスト(や他のリスト)で会話する場合の +シンプルなガイドラインを示しています- + http://www.albion.com/netiquette/ + +もし複数の人があなたのメールに返事をした場合、CC: で受ける人のリストは +だいぶ多くなるでしょう。良い理由がない場合、CC: リストから誰かを削除を +しないように、また、メーリングリストのアドレスだけにリプライすることの +ないようにしましょう。1つは送信者から、もう1つはリストからのように、メー +ルを2回受けることになってもそれに慣れ、しゃれたメールヘッダーを追加し +てこの状態を変えようとしないように。人々はそのようなことは好みません。 + +今までのメールでのやりとりとその間のあなたの発言はそのまま残し、 +"John Kernlehacker wrote ...:" の行をあなたのリプライの先頭行にして、 +メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで +す。 + +もしパッチをメールに付ける場合は、Documentaion/SubmittingPatches に提 +示されているように、それは プレーンな可読テキストにすることを忘れない +ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま +せん- +彼らはあなたのパッチの行毎にコメントを入れたいので、そのためにはそうす +るしかありません。あなたのメールプログラムが空白やタブを圧縮しないよう +に確認した方がいいです。最初の良いテストとしては、自分にメールを送って +みて、そのパッチを自分で当ててみることです。もしそれがうまく行かないな +ら、あなたのメールプログラムを直してもらうか、正しく動くように変えるべ +きです。 + +とりわけ、他の登録者に対する尊敬を表すようにすることを覚えておいてくだ +さい。 + +コミュニティと共に働くこと +-------------------------- + +カーネルコミュニティのゴールは可能なかぎり最高のカーネルを提供すること +です。あなたがパッチを受け入れてもらうために投稿した場合、それは、技術 +的メリットだけがレビューされます。その際、あなたは何を予想すべきでしょ +うか? + - 批判 + - コメント + - 変更の要求 + - パッチの正当性の証明要求 + - 沈黙 + +思い出してください、ここはあなたのパッチをカーネルに入れる話です。あ +なたは、あなたのパッチに対する批判とコメントを受け入れるべきで、それら +を技術的レベルで評価して、パッチを再作成するか、なぜそれらの変更をすべ +きでないかを明確で簡潔な理由の説明を提供してください。 +もし、あなたのパッチに何も反応がない場合、たまにはメールの山に埋もれて +見逃され、あなたの投稿が忘れられてしまうこともあるので、数日待って再度 +投稿してください。 + +あなたがやるべきでないものは? + - 質問なしにあなたのパッチが受け入れられると想像すること + - 守りに入ること + - コメントを無視すること + - 要求された変更を何もしないでパッチを出し直すこと + +可能な限り最高の技術的解決を求めているコミュニティでは、パッチがどのく +らい有益なのかについては常に異なる意見があります。あなたは協調的である +べきですし、また、あなたのアイディアをカーネルに対してうまく合わせるよ +うにすることが望まれています。もしくは、最低限あなたのアイディアがそれ +だけの価値があるとすすんで証明するようにしなければなりません。 +正しい解決に向かって進もうという意志がある限り、間違うことがあっても許 +容されることを忘れないでください。 + +あなたの最初のパッチに単に 1ダースもの修正を求めるリストの返答になるこ +とも普通のことです。これはあなたのパッチが受け入れられないということで +は *ありません*、そしてあなた自身に反対することを意味するのでも *ありま +せん*。単に自分のパッチに対して指摘された問題を全て修正して再送すれば +いいのです。 + +カーネルコミュニティと企業組織のちがい +----------------------------------------------------------------- + +カーネルコミュニティは大部分の伝統的な会社の開発環境とは異ったやり方で +動いています。以下は問題を避けるためにできるとよいことののリストです- + + あなたの提案する変更について言うときのうまい言い方: + + - "これは複数の問題を解決します" + - "これは2000行のコードを削除します" + - "以下のパッチは、私が言おうとしていることを説明するものです" + - "私はこれを5つの異なるアーキテクチャでテストしたのですが..." + - "以下は一連の小さなパッチ群ですが..." + - "これは典型的なマシンでの性能を向上させます.." + + やめた方がいい悪い言い方: + + - このやり方で AIX/ptx/Solaris ではできたので、できるはずだ + - 私はこれを20年もの間やってきた、だから + - これは、私の会社が金儲けをするために必要だ + - これは我々のエンタープライズ向け商品ラインのためである + - これは 私が自分のアイディアを記述した、1000ページの設計資料である + - 私はこれについて、6ケ月作業している。 + - 以下は ... に関する5000行のパッチです + - 私は現在のぐちゃぐちゃを全部書き直した、それが以下です... + - 私は〆切がある、そのためこのパッチは今すぐ適用される必要がある + +カーネルコミュニティが大部分の伝統的なソフトウェアエンジニアリングの労 +働環境と異なるもう一つの点は、やりとりに顔を合わせないということです。 +email と irc を第一のコミュニケーションの形とする一つの利点は、性別や +民族の差別がないことです。Linux カーネルの職場環境は女性や少数民族を受 +容します。なぜなら、email アドレスによってのみあなたが認識されるからで +す。 +国際的な側面からも活動領域を均等にするようにします。なぜならば、あなた +は人の名前で性別を想像できないからです。ある男性が アンドレアという名 +前で、女性の名前は パット かもしれません (訳注 Andrea は米国では女性、 +それ以外(欧州など)では男性名として使われることが多い。同様に、Pat は +Patricia (主に女性名)や Patrick (主に男性名)の略称)。 +Linux カーネルの活動をして、意見を表明したことがある大部分の女性は、前 +向きな経験をもっています。 + +言葉の壁は英語が得意でない一部の人には問題になります。 +メーリングリストの中できちんとアイディアを交換するには、相当うまく英語 +を操れる必要があることもあります。そのため、あなたは自分のメール +を送る前に英語で意味が通じているかをチェックすることをお薦めします。 + +変更を分割する +--------------------- + +Linux カーネルコミュニティは、一度に大量のコードの塊を喜んで受容するこ +とはありません。変更は正確に説明される必要があり、議論され、小さい、個 +別の部分に分割する必要があります。これはこれまで多くの会社がやり慣れて +きたことと全く正反対のことです。あなたのプロポーザルは、開発プロセスのと +ても早い段階から紹介されるべきです。そうすれば あなたは自分のやってい +ることにフィードバックを得られます。これは、コミュニティからみれば、あ +なたが彼らと一緒にやっているように感じられ、単にあなたの提案する機能の +ゴミ捨て場として使っているのではない、と感じられるでしょう。 +しかし、一度に 50 もの email をメーリングリストに送りつけるようなことは +やってはいけません、あなたのパッチ群はいつもどんな時でもそれよりは小さ +くなければなりません。 + +パッチを分割する理由は以下です- + +1) 小さいパッチはあなたのパッチが適用される見込みを大きくします、カー + ネルの人達はパッチが正しいかどうかを確認する時間や労力をかけないか + らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。し + かし、500行のパッチは、正しいことをレビューするのに数時間かかるかも + しれません(時間はパッチのサイズなどにより指数関数に比例してかかりま + す) + 小さいパッチは何かあったときにデバッグもとても簡単になります。パッ + チを1個1個取り除くのは、とても大きなパッチを当てた後に(かつ、何かお + かしくなった後で)解剖するのに比べればとても簡単です。 + +2) 小さいパッチを送るだけでなく、送るまえに、書き直して、シンプルにす + る(もしくは、単に順番を変えるだけでも)ことも、とても重要です。 + +以下はカーネル開発者の Al Viro のたとえ話しです: + + "生徒の数学の宿題を採点する先生のことを考えてみてください、先 + 生は生徒が解に到達するまでの試行錯誤をみたいとは思わないでしょ + う。先生は簡潔な最高の解をみたいのです。良い生徒はこれを知って + おり、そして最終解の前の中間作業を提出することは決してないので + す" + カーネル開発でもこれは同じです。メンテナー達とレビューア達は、 + 問題を解決する解の背後になる思考プロセスをみたいとは思いません。 + 彼らは単純であざやかな解決方法をみたいのです。 + +あざやかな解を説明するのと、コミュニティと共に仕事をし、未解決の仕事を +議論することのバランスをキープするのは難しいかもしれません。 +ですから、開発プロセスの早期段階で改善のためのフィードバックをもらうよ +うにするのもいいですが、変更点を小さい部分に分割して全体ではまだ完成し +ていない仕事を(部分的に)取り込んでもらえるようにすることもいいことです。 + +また、でき上がっていないものや、"将来直す" ようなパッチを、本流に含め +てもらうように送っても、それは受け付けられないことを理解してください。 + +あなたの変更を正当化する +------------------- + +あなたのパッチを分割するのと同時に、なぜその変更を追加しなければならな +いかを Linux コミュニティに知らせることはとても重要です。新機能は必要 +性と有用性で正当化されなければなりません。 + +あなたの変更の説明 +-------------------- + +あなたのパッチを送付する場合には、メールの中のテキストで何を言うかにつ +いて、特別に注意を払ってください。この情報はパッチの ChangeLog に使わ +れ、いつも皆がみられるように保管されます。これは次のような項目を含め、 +パッチを完全に記述するべきです- + + - なぜ変更が必要か + - パッチ全体の設計アプローチ + - 実装の詳細 + - テスト結果 + +これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ +ントの ChangeLog セクションをみてください- + "The Perfect Patch" + http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt + +これらのどれもが、時にはとても困難です。これらの慣例を完璧に実施するに +は数年かかるかもしれません。これは継続的な改善のプロセスであり、そのた +めには多数の忍耐と決意を必要とするものです。でも、諦めないで、これは可 +能なことです。多数の人がすでにできていますし、彼らも皆最初はあなたと同 +じところからスタートしたのですから。 + +Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process" +(http://linux.tar.bz/articles/2.6-development_process)セクショ +ンをこのテキストの原型にすることを許可してくれました。 +Rundy Dunlap と Gerrit Huizenga はメーリングリストでやるべきこととやっ +てはいけないことのリストを提供してくれました。 +以下の人々のレビュー、コメント、貢献に感謝。 +Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers, +Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi +Kleen, Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop, +David A. Wheeler, Junio Hamano, Michael Kerrisk, と Alex Shepard +彼らの支援なしでは、このドキュメントはできなかったでしょう。 + +Maintainer: Greg Kroah-Hartman <greg@kroah.com> diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt new file mode 100644 index 000000000000..b3f2b27f0881 --- /dev/null +++ b/Documentation/ja_JP/stable_api_nonsense.txt @@ -0,0 +1,263 @@ +NOTE: +This is a Japanese translated version of +"Documentation/stable_api_nonsense.txt". +This one is maintained by +IKEDA, Munehiro <m-ikeda@ds.jp.nec.com> +and JF Project team <http://www.linux.or.jp/JF/>. +If you find difference with original file or problem in translation, +please contact the maintainer of this file or JF project. + +Please also note that purpose of this file is easier to read for non +English natives and not to be intended to fork. So, if you have any +comments or updates of this file, please try to update +Original(English) file at first. + +================================== +これは、 +linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳 +です。 +翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > +翻訳日 : 2007/06/11 +原著作者: Greg Kroah-Hartman < greg at kroah dot com > +翻訳者 : 池田 宗広 < m-ikeda at ds dot jp dot nec dot com > +校正者 : Masanori Kobayashi さん < zap03216 at nifty dot ne dot jp > + Seiji Kaneko さん < skaneko at a2 dot mbn dot or dot jp > +================================== + + + +Linux カーネルのドライバインターフェース +(あなたの質問すべてに対する回答とその他諸々) + +Greg Kroah-Hartman <greg at kroah dot com> + + +この文書は、なぜ Linux ではバイナリカーネルインターフェースが定義 +されていないのか、またはなぜ不変のカーネルインターフェースを持たな +いのか、ということを説明するために書かれた。ここでの話題は「カーネ +ル内部の」インターフェースについてであり、ユーザー空間とのインター +フェースではないことを理解してほしい。カーネルとユーザー空間とのイ +ンターフェースとはアプリケーションプログラムが使用するものであり、 +つまりシステムコールのインターフェースがこれに当たる。これは今まで +長きに渡り、かつ今後も「まさしく」不変である。私は確か 0.9 か何か +より前のカーネルを使ってビルドした古いプログラムを持っているが、そ +れは最新の 2.6 カーネルでもきちんと動作する。ユーザー空間とのイン +ターフェースは、ユーザーとアプリケーションプログラマが不変性を信頼 +してよいものの一つである。 + + +要旨 +---- + +あなたは不変のカーネルインターフェースが必要だと考えているかもしれ +ないが、実際のところはそうではない。あなたは必要としているものが分 +かっていない。あなたが必要としているものは安定して動作するドライバ +であり、それはドライバがメインのカーネルツリーに含まれる場合のみ得 +ることができる。ドライバがメインのカーネルツリーに含まれていると、 +他にも多くの良いことがある。それは、Linux をより強固で、安定な、成 +熟したオペレーティングシステムにすることができるということだ。これ +こそ、そもそもあなたが Linux を使う理由のはずだ。 + + +はじめに +-------- + +カーネル内部のインターフェース変更を心配しなければならないドライバ +を書きたいなどというのは、変わり者だけだ。この世界のほとんどの人は、 +そのようなドライバがどんなインターフェースを使っているかなど知らな +いし、そんなドライバのことなど全く気にもかけていない。 + + +まず初めに、クローズソースとか、ソースコードの隠蔽とか、バイナリの +みが配布される使い物にならない代物[訳注(1)]とか、実体はバイナリ +コードでそれを読み込むためのラッパー部分のみソースコードが公開され +ているとか、その他用語は何であれ GPL の下にソースコードがリリース +されていないカーネルドライバに関する法的な問題について、私は「いか +なる議論も」行うつもりがない。法的な疑問があるのならば、プログラマ +である私ではなく、弁護士に相談して欲しい。ここでは単に、技術的な問 +題について述べることにする。(法的な問題を軽視しているわけではない。 +それらは実際に存在するし、あなたはそれをいつも気にかけておく必要が +ある) + +訳注(1) +「使い物にならない代物」の原文は "blob" + + +さてここでは、バイナリカーネルインターフェースについてと、ソースレ +ベルでのインターフェースの不変性について、という二つの話題を取り上 +げる。この二つは互いに依存する関係にあるが、まずはバイナリインター +フェースについて議論を行いやっつけてしまおう。 + + +バイナリカーネルインターフェース +-------------------------------- + +もしソースレベルでのインターフェースが不変ならば、バイナリインター +フェースも当然のように不変である、というのは正しいだろうか?正しく +ない。Linux カーネルに関する以下の事実を考えてみてほしい。 + - あなたが使用するCコンパイラのバージョンによって、カーネル内部 + の構造体の配置構造は異なったものになる。また、関数は異なった方 + 法でカーネルに含まれることになるかもしれない(例えばインライン + 関数として扱われたり、扱われなかったりする)。個々の関数がどの + ようにコンパイルされるかはそれほど重要ではないが、構造体のパデ + ィングが異なるというのは非常に重要である。 + - あなたがカーネルのビルドオプションをどのように設定するかによっ + て、カーネルには広い範囲で異なった事態が起こり得る。 + - データ構造は異なるデータフィールドを持つかもしれない + - いくつかの関数は全く実装されていない状態になり得る + (例:SMP向けではないビルドでは、いくつかのロックは中身が + カラにコンパイルされる) + - カーネル内のメモリは、異なった方法で配置され得る。これはビ + ルドオプションに依存している。 + - Linux は様々な異なるプロセッサアーキテクチャ上で動作する。 + あるアーキテクチャ用のバイナリドライバを、他のアーキテクチャで + 正常に動作させる方法はない。 + + +ある特定のカーネル設定を使用し、カーネルをビルドしたのと正確に同じ +Cコンパイラを使用して単にカーネルモジュールをコンパイルするだけで +も、あなたはこれらいくつもの問題に直面することになる。ある特定の +Linux ディストリビューションの、ある特定のリリースバージョン用にモ +ジュールを提供しようと思っただけでも、これらの問題を引き起こすには +十分である。にも関わらず Linux ディストリビューションの数と、サ +ポートするディストリビューションのリリース数を掛け算し、それら一つ +一つについてビルドを行ったとしたら、今度はリリースごとのビルドオプ +ションの違いという悪夢にすぐさま悩まされることになる。また、ディス +トリビューションの各リリースバージョンには、異なるハードウェア(プ +ロセッサタイプや種々のオプション)に対応するため、何種類かのカーネ +ルが含まれているということも理解して欲しい。従って、ある一つのリ +リースバージョンだけのためにモジュールを作成する場合でも、あなたは +何バージョンものモジュールを用意しなければならない。 + + +信じて欲しい。このような方法でサポートを続けようとするなら、あなた +はいずれ正気を失うだろう。遠い昔、私はそれがいかに困難なことか、身 +をもって学んだのだ・・・ + + +不変のカーネルソースレベルインターフェース +------------------------------------------ + +メインカーネルツリーに含まれていない Linux カーネルドライバを継続 +してサポートしていこうとしている人たちとの議論においては、これは極 +めて「引火性の高い」話題である。[訳注(2)] + +訳注(2) +「引火性の高い」の原文は "volatile"。 +volatile には「揮発性の」「爆発しやすい」という意味の他、「変わり +やすい」「移り気な」という意味がある。 +「(この話題は)爆発的に激しい論争を巻き起こしかねない」ということ +を、「(カーネルのソースレベルインターフェースは)移ろい行くもので +ある」ということを連想させる "volatile" という単語で表現している。 + + +Linux カーネルの開発は継続的に速いペースで行われ、決して歩みを緩め +ることがない。その中でカーネル開発者達は、現状のインターフェースに +あるバグを見つけ、より良い方法を考え出す。彼らはやがて、現状のイン +ターフェースがより正しく動作するように修正を行う。その過程で関数の +名前は変更されるかもしれず、構造体は大きく、または小さくなるかもし +れず、関数の引数は検討しなおされるかもしれない。そのような場合、引 +き続き全てが正常に動作するよう、カーネル内でこれらのインターフェー +スを使用している個所も全て同時に修正される。 + + +具体的な例として、カーネル内の USB インターフェースを挙げる。USB +サブシステムはこれまでに少なくとも3回の書き直しが行われ、その結果 +インターフェースが変更された。これらの書き直しはいくつかの異なった +問題を修正するために行われた。 + - 同期的データストリームが非同期に変更された。これにより多数のド + ライバを単純化でき、全てのドライバのスループットが向上した。今 + やほとんど全ての USB デバイスは、考えられる最高の速度で動作し + ている。 + - USB ドライバが USB サブシステムのコアから行う、データパケット + 用のメモリ確保方法が変更された。これに伴い、いくつもの文書化さ + れたデッドロック条件を回避するため、全ての USB ドライバはより + 多くの情報を USB コアに提供しなければならないようになっている。 + + +このできごとは、数多く存在するクローズソースのオペレーティングシス +テムとは全く対照的だ。それらは長期に渡り古い USB インターフェース +をメンテナンスしなければならない。古いインターフェースが残ることで、 +新たな開発者が偶然古いインターフェースを使い、正しくない方法で開発 +を行ってしまう可能性が生じる。これによりシステムの安定性は危険にさ +らされることになる。 + + +上に挙げたどちらの例においても、開発者達はその変更が重要かつ必要で +あることに合意し、比較的楽にそれを実行した。もし Linux がソースレ +ベルでインターフェースの不変性を保証しなければならないとしたら、新 +しいインターフェースを作ると同時に、古い、問題のある方を今後ともメ +ンテナンスするという余計な仕事を USB の開発者にさせなければならな +い。Linux の USB 開発者は、自分の時間を使って仕事をしている。よっ +て、価値のない余計な仕事を報酬もなしに実行しろと言うことはできない。 + + +セキュリティ問題も、Linux にとっては非常に重要である。ひとたびセキ +ュリティに関する問題が発見されれば、それは極めて短期間のうちに修正 +される。セキュリティ問題の発生を防ぐための修正は、カーネルの内部イ +ンターフェースの変更を何度も引き起こしてきた。その際同時に、変更さ +れたインターフェースを使用する全てのドライバもまた変更された。これ +により問題が解消し、将来偶然に問題が再発してしまわないことが保証さ +れる。もし内部インターフェースの変更が許されないとしたら、このよう +にセキュリティ問題を修正し、将来再発しないことを保証することなど不 +可能なのだ。 + + +カーネルのインターフェースは時が経つにつれクリーンナップを受ける。 +誰も使っていないインターフェースは削除される。これにより、可能な限 +りカーネルが小さく保たれ、現役の全てのインターフェースが可能な限り +テストされることを保証しているのだ。(使われていないインターフェー +スの妥当性をテストすることは不可能と言っていいだろう) + + + +これから何をすべきか +----------------------- + +では、もしメインのカーネルツリーに含まれない Linux カーネルドライ +バがあったとして、あなたは、つまり開発者は何をするべきだろうか?全 +てのディストリビューションの全てのカーネルバージョン向けにバイナリ +のドライバを供給することは悪夢であり、カーネルインターフェースの変 +更を追いかけ続けることもまた過酷な仕事だ。 + + +答えは簡単。そのドライバをメインのカーネルツリーに入れてしまえばよ +い。(ここで言及しているのは、GPL に従って公開されるドライバのこと +だということに注意してほしい。あなたのコードがそれに該当しないなら +ば、さよなら。幸運を祈ります。ご自分で何とかしてください。Andrew +と Linus からのコメント<Andrew と Linus のコメントへのリンクをこ +こに置く>をどうぞ)ドライバがメインツリーに入れば、カーネルのイン +ターフェースが変更された場合、変更を行った開発者によってドライバも +修正されることになるだろう。あなたはほとんど労力を払うことなしに、 +常にビルド可能できちんと動作するドライバを手に入れることができる。 + + +ドライバをメインのカーネルツリーに入れると、非常に好ましい以下の効 +果がある。 + - ドライバの品質が向上する一方で、(元の開発者にとっての)メンテ + ナンスコストは下がる。 + - あなたのドライバに他の開発者が機能を追加してくれる。 + - 誰かがあなたのドライバにあるバグを見つけ、修正してくれる。 + - 誰かがあなたのドライバにある改善点を見つけてくれる。 + - 外部インターフェースが変更されドライバの更新が必要になった場合、 + 誰かがあなたの代わりに更新してくれる。 + - ドライバを入れてくれとディストロに頼まなくても、そのドライバは + 全ての Linux ディストリビューションに自動的に含まれてリリース + される。 + + +Linux では、他のどのオペレーティングシステムよりも数多くのデバイス +が「そのまま」使用できるようになった。また Linux は、どのオペレー +ティングシステムよりも数多くのプロセッサアーキテクチャ上でそれらの +デバイスを使用することができるようにもなった。このように、Linux の +開発モデルは実証されており、今後も間違いなく正しい方向へと進んでい +くだろう。:) + + + +------ + +この文書の初期の草稿に対し、Randy Dunlap, Andrew Morton, David +Brownell, Hanna Linder, Robert Love, Nishanth Aravamudan から査読 +と助言を頂きました。感謝申し上げます。 + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 68115d7b0a7a..5fbe07706ae9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1887,6 +1887,9 @@ and is between 256 and 4096 characters. It is defined in the file vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping + vector= [IA-64,SMP] + vector=percpu: enable percpu vector domain + video= [FB] Frame buffer configuration See Documentation/fb/modedb.txt. diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index da5404ab7569..cb12ae175aa2 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -247,12 +247,6 @@ control to Kprobes.) If the probed function is declared asmlinkage, fastcall, or anything else that affects how args are passed, the handler's declaration must match. -NOTE: A macro JPROBE_ENTRY is provided to handle architecture-specific -aliasing of jp->entry. In the interest of portability, it is advised -to use: - - jp->entry = JPROBE_ENTRY(handler); - register_jprobe() returns 0 on success, or a negative errno otherwise. 4.3 register_kretprobe @@ -518,7 +512,7 @@ long jdo_fork(unsigned long clone_flags, unsigned long stack_start, } static struct jprobe my_jprobe = { - .entry = JPROBE_ENTRY(jdo_fork) + .entry = jdo_fork }; static int __init jprobe_init(void) diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile new file mode 100644 index 000000000000..b9b9427376e9 --- /dev/null +++ b/Documentation/lguest/Makefile @@ -0,0 +1,27 @@ +# This creates the demonstration utility "lguest" which runs a Linux guest. + +# For those people that have a separate object dir, look there for .config +KBUILD_OUTPUT := ../.. +ifdef O + ifeq ("$(origin O)", "command line") + KBUILD_OUTPUT := $(O) + endif +endif +# We rely on CONFIG_PAGE_OFFSET to know where to put lguest binary. +include $(KBUILD_OUTPUT)/.config +LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000) + +CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 \ + -static -DLGUEST_GUEST_TOP="$(LGUEST_GUEST_TOP)" -Wl,-T,lguest.lds +LDLIBS:=-lz + +all: lguest.lds lguest + +# The linker script on x86 is so complex the only way of creating one +# which will link our binary in the right place is to mangle the +# default one. +lguest.lds: + $(LD) --verbose | awk '/^==========/ { PRINT=1; next; } /SIZEOF_HEADERS/ { gsub(/0x[0-9A-F]*/, "$(LGUEST_GUEST_TOP)") } { if (PRINT) print $$0; }' > $@ + +clean: + rm -f lguest.lds lguest diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c new file mode 100644 index 000000000000..1432b502a2d9 --- /dev/null +++ b/Documentation/lguest/lguest.c @@ -0,0 +1,1012 @@ +/* Simple program to layout "physical" memory for new lguest guest. + * Linked high to avoid likely physical memory. */ +#define _LARGEFILE64_SOURCE +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <err.h> +#include <stdint.h> +#include <stdlib.h> +#include <elf.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <stdbool.h> +#include <errno.h> +#include <ctype.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <time.h> +#include <netinet/in.h> +#include <net/if.h> +#include <linux/sockios.h> +#include <linux/if_tun.h> +#include <sys/uio.h> +#include <termios.h> +#include <getopt.h> +#include <zlib.h> +typedef unsigned long long u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; +#include "../../include/linux/lguest_launcher.h" +#include "../../include/asm-i386/e820.h" + +#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ +#define NET_PEERNUM 1 +#define BRIDGE_PFX "bridge:" +#ifndef SIOCBRADDIF +#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ +#endif + +static bool verbose; +#define verbose(args...) \ + do { if (verbose) printf(args); } while(0) +static int waker_fd; + +struct device_list +{ + fd_set infds; + int max_infd; + + struct device *dev; + struct device **lastdev; +}; + +struct device +{ + struct device *next; + struct lguest_device_desc *desc; + void *mem; + + /* Watch this fd if handle_input non-NULL. */ + int fd; + bool (*handle_input)(int fd, struct device *me); + + /* Watch DMA to this key if handle_input non-NULL. */ + unsigned long watch_key; + u32 (*handle_output)(int fd, const struct iovec *iov, + unsigned int num, struct device *me); + + /* Device-specific data. */ + void *priv; +}; + +static int open_or_die(const char *name, int flags) +{ + int fd = open(name, flags); + if (fd < 0) + err(1, "Failed to open %s", name); + return fd; +} + +static void *map_zeroed_pages(unsigned long addr, unsigned int num) +{ + static int fd = -1; + + if (fd == -1) + fd = open_or_die("/dev/zero", O_RDONLY); + + if (mmap((void *)addr, getpagesize() * num, + PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0) + != (void *)addr) + err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr); + return (void *)addr; +} + +/* Find magic string marking entry point, return entry point. */ +static unsigned long entry_point(void *start, void *end, + unsigned long page_offset) +{ + void *p; + + for (p = start; p < end; p++) + if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) + return (long)p + strlen("GenuineLguest") + page_offset; + + err(1, "Is this image a genuine lguest?"); +} + +/* Returns the entry point */ +static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, + unsigned long *page_offset) +{ + void *addr; + Elf32_Phdr phdr[ehdr->e_phnum]; + unsigned int i; + unsigned long start = -1UL, end = 0; + + /* Sanity checks. */ + if (ehdr->e_type != ET_EXEC + || ehdr->e_machine != EM_386 + || ehdr->e_phentsize != sizeof(Elf32_Phdr) + || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) + errx(1, "Malformed elf header"); + + if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) + err(1, "Seeking to program headers"); + if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) + err(1, "Reading program headers"); + + *page_offset = 0; + /* We map the loadable segments at virtual addresses corresponding + * to their physical addresses (our virtual == guest physical). */ + for (i = 0; i < ehdr->e_phnum; i++) { + if (phdr[i].p_type != PT_LOAD) + continue; + + verbose("Section %i: size %i addr %p\n", + i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); + + /* We expect linear address space. */ + if (!*page_offset) + *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr; + else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr) + errx(1, "Page offset of section %i different", i); + + if (phdr[i].p_paddr < start) + start = phdr[i].p_paddr; + if (phdr[i].p_paddr + phdr[i].p_filesz > end) + end = phdr[i].p_paddr + phdr[i].p_filesz; + + /* We map everything private, writable. */ + addr = mmap((void *)phdr[i].p_paddr, + phdr[i].p_filesz, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, + elf_fd, phdr[i].p_offset); + if (addr != (void *)phdr[i].p_paddr) + err(1, "Mmaping vmlinux seg %i gave %p not %p", + i, addr, (void *)phdr[i].p_paddr); + } + + return entry_point((void *)start, (void *)end, *page_offset); +} + +/* This is amazingly reliable. */ +static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) +{ + unsigned int i, possibilities[256] = { 0 }; + + for (i = 0; i + 4 < len; i++) { + /* mov 0xXXXXXXXX,%eax */ + if (img[i] == 0xA1 && ++possibilities[img[i+4]] > 3) + return (unsigned long)img[i+4] << 24; + } + errx(1, "could not determine page offset"); +} + +static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) +{ + gzFile f; + int ret, len = 0; + void *img = (void *)0x100000; + + f = gzdopen(fd, "rb"); + while ((ret = gzread(f, img + len, 65536)) > 0) + len += ret; + if (ret < 0) + err(1, "reading image from bzImage"); + + verbose("Unpacked size %i addr %p\n", len, img); + *page_offset = intuit_page_offset(img, len); + + return entry_point(img, img + len, *page_offset); +} + +static unsigned long load_bzimage(int fd, unsigned long *page_offset) +{ + unsigned char c; + int state = 0; + + /* Ugly brute force search for gzip header. */ + while (read(fd, &c, 1) == 1) { + switch (state) { + case 0: + if (c == 0x1F) + state++; + break; + case 1: + if (c == 0x8B) + state++; + else + state = 0; + break; + case 2 ... 8: + state++; + break; + case 9: + lseek(fd, -10, SEEK_CUR); + if (c != 0x03) /* Compressed under UNIX. */ + state = -1; + else + return unpack_bzimage(fd, page_offset); + } + } + errx(1, "Could not find kernel in bzImage"); +} + +static unsigned long load_kernel(int fd, unsigned long *page_offset) +{ + Elf32_Ehdr hdr; + + if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + err(1, "Reading kernel"); + + if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) + return map_elf(fd, &hdr, page_offset); + + return load_bzimage(fd, page_offset); +} + +static inline unsigned long page_align(unsigned long addr) +{ + return ((addr + getpagesize()-1) & ~(getpagesize()-1)); +} + +/* initrd gets loaded at top of memory: return length. */ +static unsigned long load_initrd(const char *name, unsigned long mem) +{ + int ifd; + struct stat st; + unsigned long len; + void *iaddr; + + ifd = open_or_die(name, O_RDONLY); + if (fstat(ifd, &st) < 0) + err(1, "fstat() on initrd '%s'", name); + + len = page_align(st.st_size); + iaddr = mmap((void *)mem - len, st.st_size, + PROT_READ|PROT_EXEC|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE, ifd, 0); + if (iaddr != (void *)mem - len) + err(1, "Mmaping initrd '%s' returned %p not %p", + name, iaddr, (void *)mem - len); + close(ifd); + verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); + return len; +} + +static unsigned long setup_pagetables(unsigned long mem, + unsigned long initrd_size, + unsigned long page_offset) +{ + u32 *pgdir, *linear; + unsigned int mapped_pages, i, linear_pages; + unsigned int ptes_per_page = getpagesize()/sizeof(u32); + + /* If we can map all of memory above page_offset, we do so. */ + if (mem <= -page_offset) + mapped_pages = mem/getpagesize(); + else + mapped_pages = -page_offset/getpagesize(); + + /* Each linear PTE page can map ptes_per_page pages. */ + linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; + + /* We lay out top-level then linear mapping immediately below initrd */ + pgdir = (void *)mem - initrd_size - getpagesize(); + linear = (void *)pgdir - linear_pages*getpagesize(); + + for (i = 0; i < mapped_pages; i++) + linear[i] = ((i * getpagesize()) | PAGE_PRESENT); + + /* Now set up pgd so that this memory is at page_offset */ + for (i = 0; i < mapped_pages; i += ptes_per_page) { + pgdir[(i + page_offset/getpagesize())/ptes_per_page] + = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT); + } + + verbose("Linear mapping of %u pages in %u pte pages at %p\n", + mapped_pages, linear_pages, linear); + + return (unsigned long)pgdir; +} + +static void concat(char *dst, char *args[]) +{ + unsigned int i, len = 0; + + for (i = 0; args[i]; i++) { + strcpy(dst+len, args[i]); + strcat(dst+len, " "); + len += strlen(args[i]) + 1; + } + /* In case it's empty. */ + dst[len] = '\0'; +} + +static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) +{ + u32 args[] = { LHREQ_INITIALIZE, + LGUEST_GUEST_TOP/getpagesize(), /* Just below us */ + pgdir, start, page_offset }; + int fd; + + fd = open_or_die("/dev/lguest", O_RDWR); + if (write(fd, args, sizeof(args)) < 0) + err(1, "Writing to /dev/lguest"); + return fd; +} + +static void set_fd(int fd, struct device_list *devices) +{ + FD_SET(fd, &devices->infds); + if (fd > devices->max_infd) + devices->max_infd = fd; +} + +/* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */ +static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices) +{ + set_fd(pipefd, devices); + + for (;;) { + fd_set rfds = devices->infds; + u32 args[] = { LHREQ_BREAK, 1 }; + + select(devices->max_infd+1, &rfds, NULL, NULL, NULL); + if (FD_ISSET(pipefd, &rfds)) { + int ignorefd; + if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0) + exit(0); + FD_CLR(ignorefd, &devices->infds); + } else + write(lguest_fd, args, sizeof(args)); + } +} + +static int setup_waker(int lguest_fd, struct device_list *device_list) +{ + int pipefd[2], child; + + pipe(pipefd); + child = fork(); + if (child == -1) + err(1, "forking"); + + if (child == 0) { + close(pipefd[1]); + wake_parent(pipefd[0], lguest_fd, device_list); + } + close(pipefd[0]); + + return pipefd[1]; +} + +static void *_check_pointer(unsigned long addr, unsigned int size, + unsigned int line) +{ + if (addr >= LGUEST_GUEST_TOP || addr + size >= LGUEST_GUEST_TOP) + errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); + return (void *)addr; +} +#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) + +/* Returns pointer to dma->used_len */ +static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num) +{ + unsigned int i; + struct lguest_dma *udma; + + udma = check_pointer(dma, sizeof(*udma)); + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (!udma->len[i]) + break; + + iov[i].iov_base = check_pointer(udma->addr[i], udma->len[i]); + iov[i].iov_len = udma->len[i]; + } + *num = i; + return &udma->used_len; +} + +static u32 *get_dma_buffer(int fd, void *key, + struct iovec iov[], unsigned int *num, u32 *irq) +{ + u32 buf[] = { LHREQ_GETDMA, (u32)key }; + unsigned long udma; + u32 *res; + + udma = write(fd, buf, sizeof(buf)); + if (udma == (unsigned long)-1) + return NULL; + + /* Kernel stashes irq in ->used_len. */ + res = dma2iov(udma, iov, num); + *irq = *res; + return res; +} + +static void trigger_irq(int fd, u32 irq) +{ + u32 buf[] = { LHREQ_IRQ, irq }; + if (write(fd, buf, sizeof(buf)) != 0) + err(1, "Triggering irq %i", irq); +} + +static void discard_iovec(struct iovec *iov, unsigned int *num) +{ + static char discard_buf[1024]; + *num = 1; + iov->iov_base = discard_buf; + iov->iov_len = sizeof(discard_buf); +} + +static struct termios orig_term; +static void restore_term(void) +{ + tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); +} + +struct console_abort +{ + int count; + struct timeval start; +}; + +/* We DMA input to buffer bound at start of console page. */ +static bool handle_console_input(int fd, struct device *dev) +{ + u32 irq = 0, *lenp; + int len; + unsigned int num; + struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; + struct console_abort *abort = dev->priv; + + lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq); + if (!lenp) { + warn("console: no dma buffer!"); + discard_iovec(iov, &num); + } + + len = readv(dev->fd, iov, num); + if (len <= 0) { + warnx("Failed to get console input, ignoring console."); + len = 0; + } + + if (lenp) { + *lenp = len; + trigger_irq(fd, irq); + } + + /* Three ^C within one second? Exit. */ + if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { + if (!abort->count++) + gettimeofday(&abort->start, NULL); + else if (abort->count == 3) { + struct timeval now; + gettimeofday(&now, NULL); + if (now.tv_sec <= abort->start.tv_sec+1) { + /* Make sure waker is not blocked in BREAK */ + u32 args[] = { LHREQ_BREAK, 0 }; + close(waker_fd); + write(fd, args, sizeof(args)); + exit(2); + } + abort->count = 0; + } + } else + abort->count = 0; + + if (!len) { + restore_term(); + return false; + } + return true; +} + +static u32 handle_console_output(int fd, const struct iovec *iov, + unsigned num, struct device*dev) +{ + return writev(STDOUT_FILENO, iov, num); +} + +static u32 handle_tun_output(int fd, const struct iovec *iov, + unsigned num, struct device *dev) +{ + /* Now we've seen output, we should warn if we can't get buffers. */ + *(bool *)dev->priv = true; + return writev(dev->fd, iov, num); +} + +static unsigned long peer_offset(unsigned int peernum) +{ + return 4 * peernum; +} + +static bool handle_tun_input(int fd, struct device *dev) +{ + u32 irq = 0, *lenp; + int len; + unsigned num; + struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; + + lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num, + &irq); + if (!lenp) { + if (*(bool *)dev->priv) + warn("network: no dma buffer!"); + discard_iovec(iov, &num); + } + + len = readv(dev->fd, iov, num); + if (len <= 0) + err(1, "reading network"); + if (lenp) { + *lenp = len; + trigger_irq(fd, irq); + } + verbose("tun input packet len %i [%02x %02x] (%s)\n", len, + ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1], + lenp ? "sent" : "discarded"); + return true; +} + +static u32 handle_block_output(int fd, const struct iovec *iov, + unsigned num, struct device *dev) +{ + struct lguest_block_page *p = dev->mem; + u32 irq, *lenp; + unsigned int len, reply_num; + struct iovec reply[LGUEST_MAX_DMA_SECTIONS]; + off64_t device_len, off = (off64_t)p->sector * 512; + + device_len = *(off64_t *)dev->priv; + + if (off >= device_len) + err(1, "Bad offset %llu vs %llu", off, device_len); + if (lseek64(dev->fd, off, SEEK_SET) != off) + err(1, "Bad seek to sector %i", p->sector); + + verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off); + + lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq); + if (!lenp) + err(1, "Block request didn't give us a dma buffer"); + + if (p->type) { + len = writev(dev->fd, iov, num); + if (off + len > device_len) { + ftruncate(dev->fd, device_len); + errx(1, "Write past end %llu+%u", off, len); + } + *lenp = 0; + } else { + len = readv(dev->fd, reply, reply_num); + *lenp = len; + } + + p->result = 1 + (p->bytes != len); + trigger_irq(fd, irq); + return 0; +} + +static void handle_output(int fd, unsigned long dma, unsigned long key, + struct device_list *devices) +{ + struct device *i; + u32 *lenp; + struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; + unsigned num = 0; + + lenp = dma2iov(dma, iov, &num); + for (i = devices->dev; i; i = i->next) { + if (i->handle_output && key == i->watch_key) { + *lenp = i->handle_output(fd, iov, num, i); + return; + } + } + warnx("Pending dma %p, key %p", (void *)dma, (void *)key); +} + +static void handle_input(int fd, struct device_list *devices) +{ + struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; + + for (;;) { + struct device *i; + fd_set fds = devices->infds; + + if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0) + break; + + for (i = devices->dev; i; i = i->next) { + if (i->handle_input && FD_ISSET(i->fd, &fds)) { + if (!i->handle_input(fd, i)) { + FD_CLR(i->fd, &devices->infds); + /* Tell waker to ignore it too... */ + write(waker_fd, &i->fd, sizeof(i->fd)); + } + } + } + } +} + +static struct lguest_device_desc *new_dev_desc(u16 type, u16 features, + u16 num_pages) +{ + static unsigned long top = LGUEST_GUEST_TOP; + struct lguest_device_desc *desc; + + desc = malloc(sizeof(*desc)); + desc->type = type; + desc->num_pages = num_pages; + desc->features = features; + desc->status = 0; + if (num_pages) { + top -= num_pages*getpagesize(); + map_zeroed_pages(top, num_pages); + desc->pfn = top / getpagesize(); + } else + desc->pfn = 0; + return desc; +} + +static struct device *new_device(struct device_list *devices, + u16 type, u16 num_pages, u16 features, + int fd, + bool (*handle_input)(int, struct device *), + unsigned long watch_off, + u32 (*handle_output)(int, + const struct iovec *, + unsigned, + struct device *)) +{ + struct device *dev = malloc(sizeof(*dev)); + + /* Append to device list. */ + *devices->lastdev = dev; + dev->next = NULL; + devices->lastdev = &dev->next; + + dev->fd = fd; + if (handle_input) + set_fd(dev->fd, devices); + dev->desc = new_dev_desc(type, features, num_pages); + dev->mem = (void *)(dev->desc->pfn * getpagesize()); + dev->handle_input = handle_input; + dev->watch_key = (unsigned long)dev->mem + watch_off; + dev->handle_output = handle_output; + return dev; +} + +static void setup_console(struct device_list *devices) +{ + struct device *dev; + + if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { + struct termios term = orig_term; + term.c_lflag &= ~(ISIG|ICANON|ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &term); + atexit(restore_term); + } + + /* We don't currently require a page for the console. */ + dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0, + STDIN_FILENO, handle_console_input, + LGUEST_CONSOLE_DMA_KEY, handle_console_output); + dev->priv = malloc(sizeof(struct console_abort)); + ((struct console_abort *)dev->priv)->count = 0; + verbose("device %p: console\n", + (void *)(dev->desc->pfn * getpagesize())); +} + +static void setup_block_file(const char *filename, struct device_list *devices) +{ + int fd; + struct device *dev; + off64_t *device_len; + struct lguest_block_page *p; + + fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT); + dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1, + LGUEST_DEVICE_F_RANDOMNESS, + fd, NULL, 0, handle_block_output); + device_len = dev->priv = malloc(sizeof(*device_len)); + *device_len = lseek64(fd, 0, SEEK_END); + p = dev->mem; + + p->num_sectors = *device_len/512; + verbose("device %p: block %i sectors\n", + (void *)(dev->desc->pfn * getpagesize()), p->num_sectors); +} + +/* We use fnctl locks to reserve network slots (autocleanup!) */ +static unsigned int find_slot(int netfd, const char *filename) +{ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_len = 1; + for (fl.l_start = 0; + fl.l_start < getpagesize()/sizeof(struct lguest_net); + fl.l_start++) { + if (fcntl(netfd, F_SETLK, &fl) == 0) + return fl.l_start; + } + errx(1, "No free slots in network file %s", filename); +} + +static void setup_net_file(const char *filename, + struct device_list *devices) +{ + int netfd; + struct device *dev; + + netfd = open(filename, O_RDWR, 0); + if (netfd < 0) { + if (errno == ENOENT) { + netfd = open(filename, O_RDWR|O_CREAT, 0600); + if (netfd >= 0) { + char page[getpagesize()]; + memset(page, 0, sizeof(page)); + write(netfd, page, sizeof(page)); + } + } + if (netfd < 0) + err(1, "cannot open net file '%s'", filename); + } + + dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, + find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM, + -1, NULL, 0, NULL); + + /* We overwrite the /dev/zero mapping with the actual file. */ + if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem) + err(1, "could not mmap '%s'", filename); + verbose("device %p: shared net %s, peer %i\n", + (void *)(dev->desc->pfn * getpagesize()), filename, + dev->desc->features & ~LGUEST_NET_F_NOCSUM); +} + +static u32 str2ip(const char *ipaddr) +{ + unsigned int byte[4]; + + sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]); + return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; +} + +/* adapted from libbridge */ +static void add_to_bridge(int fd, const char *if_name, const char *br_name) +{ + int ifidx; + struct ifreq ifr; + + if (!*br_name) + errx(1, "must specify bridge name"); + + ifidx = if_nametoindex(if_name); + if (!ifidx) + errx(1, "interface %s does not exist!", if_name); + + strncpy(ifr.ifr_name, br_name, IFNAMSIZ); + ifr.ifr_ifindex = ifidx; + if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) + err(1, "can't add %s to bridge %s", if_name, br_name); +} + +static void configure_device(int fd, const char *devname, u32 ipaddr, + unsigned char hwaddr[6]) +{ + struct ifreq ifr; + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, devname); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = htonl(ipaddr); + if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) + err(1, "Setting %s interface address", devname); + ifr.ifr_flags = IFF_UP; + if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) + err(1, "Bringing interface %s up", devname); + + if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) + err(1, "getting hw address for %s", devname); + + memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); +} + +static void setup_tun_net(const char *arg, struct device_list *devices) +{ + struct device *dev; + struct ifreq ifr; + int netfd, ipfd; + u32 ip; + const char *br_name = NULL; + + netfd = open_or_die("/dev/net/tun", O_RDWR); + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strcpy(ifr.ifr_name, "tap%d"); + if (ioctl(netfd, TUNSETIFF, &ifr) != 0) + err(1, "configuring /dev/net/tun"); + ioctl(netfd, TUNSETNOCSUM, 1); + + /* You will be peer 1: we should create enough jitter to randomize */ + dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, + NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd, + handle_tun_input, peer_offset(0), handle_tun_output); + dev->priv = malloc(sizeof(bool)); + *(bool *)dev->priv = false; + + ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + if (ipfd < 0) + err(1, "opening IP socket"); + + if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { + ip = INADDR_ANY; + br_name = arg + strlen(BRIDGE_PFX); + add_to_bridge(ipfd, ifr.ifr_name, br_name); + } else + ip = str2ip(arg); + + /* We are peer 0, ie. first slot. */ + configure_device(ipfd, ifr.ifr_name, ip, dev->mem); + + /* Set "promisc" bit: we want every single packet. */ + *((u8 *)dev->mem) |= 0x1; + + close(ipfd); + + verbose("device %p: tun net %u.%u.%u.%u\n", + (void *)(dev->desc->pfn * getpagesize()), + (u8)(ip>>24), (u8)(ip>>16), (u8)(ip>>8), (u8)ip); + if (br_name) + verbose("attached to bridge: %s\n", br_name); +} + +/* Now we know how much memory we have, we copy in device descriptors */ +static void map_device_descriptors(struct device_list *devs, unsigned long mem) +{ + struct device *i; + unsigned int num; + struct lguest_device_desc *descs; + + /* Device descriptor array sits just above top of normal memory */ + descs = map_zeroed_pages(mem, 1); + + for (i = devs->dev, num = 0; i; i = i->next, num++) { + if (num == LGUEST_MAX_DEVICES) + errx(1, "too many devices"); + verbose("Device %i: %s\n", num, + i->desc->type == LGUEST_DEVICE_T_NET ? "net" + : i->desc->type == LGUEST_DEVICE_T_CONSOLE ? "console" + : i->desc->type == LGUEST_DEVICE_T_BLOCK ? "block" + : "unknown"); + descs[num] = *i->desc; + free(i->desc); + i->desc = &descs[num]; + } +} + +static void __attribute__((noreturn)) +run_guest(int lguest_fd, struct device_list *device_list) +{ + for (;;) { + u32 args[] = { LHREQ_BREAK, 0 }; + unsigned long arr[2]; + int readval; + + /* We read from the /dev/lguest device to run the Guest. */ + readval = read(lguest_fd, arr, sizeof(arr)); + + if (readval == sizeof(arr)) { + handle_output(lguest_fd, arr[0], arr[1], device_list); + continue; + } else if (errno == ENOENT) { + char reason[1024] = { 0 }; + read(lguest_fd, reason, sizeof(reason)-1); + errx(1, "%s", reason); + } else if (errno != EAGAIN) + err(1, "Running guest failed"); + handle_input(lguest_fd, device_list); + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Resetting break"); + } +} + +static struct option opts[] = { + { "verbose", 0, NULL, 'v' }, + { "sharenet", 1, NULL, 's' }, + { "tunnet", 1, NULL, 't' }, + { "block", 1, NULL, 'b' }, + { "initrd", 1, NULL, 'i' }, + { NULL }, +}; +static void usage(void) +{ + errx(1, "Usage: lguest [--verbose] " + "[--sharenet=<filename>|--tunnet=(<ipaddr>|bridge:<bridgename>)\n" + "|--block=<filename>|--initrd=<filename>]...\n" + "<mem-in-mb> vmlinux [args...]"); +} + +int main(int argc, char *argv[]) +{ + unsigned long mem, pgdir, start, page_offset, initrd_size = 0; + int c, lguest_fd; + struct device_list device_list; + void *boot = (void *)0; + const char *initrd_name = NULL; + + device_list.max_infd = -1; + device_list.dev = NULL; + device_list.lastdev = &device_list.dev; + FD_ZERO(&device_list.infds); + + while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { + switch (c) { + case 'v': + verbose = true; + break; + case 's': + setup_net_file(optarg, &device_list); + break; + case 't': + setup_tun_net(optarg, &device_list); + break; + case 'b': + setup_block_file(optarg, &device_list); + break; + case 'i': + initrd_name = optarg; + break; + default: + warnx("Unknown argument %s", argv[optind]); + usage(); + } + } + if (optind + 2 > argc) + usage(); + + /* We need a console device */ + setup_console(&device_list); + + /* First we map /dev/zero over all of guest-physical memory. */ + mem = atoi(argv[optind]) * 1024 * 1024; + map_zeroed_pages(0, mem / getpagesize()); + + /* Now we load the kernel */ + start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), + &page_offset); + + /* Write the device descriptors into memory. */ + map_device_descriptors(&device_list, mem); + + /* Map the initrd image if requested */ + if (initrd_name) { + initrd_size = load_initrd(initrd_name, mem); + *(unsigned long *)(boot+0x218) = mem - initrd_size; + *(unsigned long *)(boot+0x21c) = initrd_size; + *(unsigned char *)(boot+0x210) = 0xFF; + } + + /* Set up the initial linar pagetables. */ + pgdir = setup_pagetables(mem, initrd_size, page_offset); + + /* E820 memory map: ours is a simple, single region. */ + *(char*)(boot+E820NR) = 1; + *((struct e820entry *)(boot+E820MAP)) + = ((struct e820entry) { 0, mem, E820_RAM }); + /* Command line pointer and command line (at 4096) */ + *(void **)(boot + 0x228) = boot + 4096; + concat(boot + 4096, argv+optind+2); + /* Paravirt type: 1 == lguest */ + *(int *)(boot + 0x23c) = 1; + + lguest_fd = tell_kernel(pgdir, start, page_offset); + waker_fd = setup_waker(lguest_fd, &device_list); + + run_guest(lguest_fd, &device_list); +} diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt new file mode 100644 index 000000000000..821617bd6c04 --- /dev/null +++ b/Documentation/lguest/lguest.txt @@ -0,0 +1,129 @@ +Rusty's Remarkably Unreliable Guide to Lguest + - or, A Young Coder's Illustrated Hypervisor +http://lguest.ozlabs.org + +Lguest is designed to be a minimal hypervisor for the Linux kernel, for +Linux developers and users to experiment with virtualization with the +minimum of complexity. Nonetheless, it should have sufficient +features to make it useful for specific tasks, and, of course, you are +encouraged to fork and enhance it. + +Features: + +- Kernel module which runs in a normal kernel. +- Simple I/O model for communication. +- Simple program to create new guests. +- Logo contains cute puppies: http://lguest.ozlabs.org + +Developer features: + +- Fun to hack on. +- No ABI: being tied to a specific kernel anyway, you can change anything. +- Many opportunities for improvement or feature implementation. + +Running Lguest: + +- Lguest runs the same kernel as guest and host. You can configure + them differently, but usually it's easiest not to. + + You will need to configure your kernel with the following options: + + CONFIG_HIGHMEM64G=n ("High Memory Support" "64GB")[1] + CONFIG_TUN=y/m ("Universal TUN/TAP device driver support") + CONFIG_EXPERIMENTAL=y ("Prompt for development and/or incomplete code/drivers") + CONFIG_PARAVIRT=y ("Paravirtualization support (EXPERIMENTAL)") + CONFIG_LGUEST=y/m ("Linux hypervisor example code") + + and I recommend: + CONFIG_HZ=100 ("Timer frequency")[2] + +- A tool called "lguest" is available in this directory: type "make" + to build it. If you didn't build your kernel in-tree, use "make + O=<builddir>". + +- Create or find a root disk image. There are several useful ones + around, such as the xm-test tiny root image at + http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img + + For more serious work, I usually use a distribution ISO image and + install it under qemu, then make multiple copies: + + dd if=/dev/zero of=rootfile bs=1M count=2048 + qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d + +- "modprobe lg" if you built it as a module. + +- Run an lguest as root: + + Documentation/lguest/lguest 64m vmlinux --tunnet=192.168.19.1 --block=rootfile root=/dev/lgba + + Explanation: + 64m: the amount of memory to use. + + vmlinux: the kernel image found in the top of your build directory. You + can also use a standard bzImage. + + --tunnet=192.168.19.1: configures a "tap" device for networking with this + IP address. + + --block=rootfile: a file or block device which becomes /dev/lgba + inside the guest. + + root=/dev/lgba: this (and anything else on the command line) are + kernel boot parameters. + +- Configuring networking. I usually have the host masquerade, using + "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 > + /proc/sys/net/ipv4/ip_forward". In this example, I would configure + eth0 inside the guest at 192.168.19.2. + + Another method is to bridge the tap device to an external interface + using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest + to obtain an IP address. The bridge needs to be configured first: + this option simply adds the tap interface to it. + + A simple example on my system: + + ifconfig eth0 0.0.0.0 + brctl addbr lg0 + ifconfig lg0 up + brctl addif lg0 eth0 + dhclient lg0 + + Then use --tunnet=bridge:lg0 when launching the guest. + + See http://linux-net.osdl.org/index.php/Bridge for general information + on how to get bridging working. + +- You can also create an inter-guest network using + "--sharenet=<filename>": any two guests using the same file are on + the same network. This file is created if it does not exist. + +Lguest I/O model: + +Lguest uses a simplified DMA model plus shared memory for I/O. Guests +can communicate with each other if they share underlying memory +(usually by the lguest program mmaping the same file), but they can +use any non-shared memory to communicate with the lguest process. + +Guests can register DMA buffers at any key (must be a valid physical +address) using the LHCALL_BIND_DMA(key, dmabufs, num<<8|irq) +hypercall. "dmabufs" is the physical address of an array of "num" +"struct lguest_dma": each contains a used_len, and an array of +physical addresses and lengths. When a transfer occurs, the +"used_len" field of one of the buffers which has used_len 0 will be +set to the length transferred and the irq will fire. + +Using an irq value of 0 unbinds the dma buffers. + +To send DMA, the LHCALL_SEND_DMA(key, dma_physaddr) hypercall is used, +and the bytes used is written to the used_len field. This can be 0 if +noone else has bound a DMA buffer to that key or some other error. +DMA buffers bound by the same guest are ignored. + +Cheers! +Rusty Russell rusty@rustcorp.com.au. + +[1] These are on various places on the TODO list, waiting for you to + get annoyed enough at the limitation to fix it. +[2] Lguest is not yet tickless when idle. See [1]. diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt new file mode 100644 index 000000000000..9293e4bc857c --- /dev/null +++ b/Documentation/power/notifiers.txt @@ -0,0 +1,50 @@ +Suspend notifiers + (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL + +There are some operations that device drivers may want to carry out in their +.suspend() routines, but shouldn't, because they can cause the hibernation or +suspend to fail. For example, a driver may want to allocate a substantial amount +of memory (like 50 MB) in .suspend(), but that shouldn't be done after the +swsusp's memory shrinker has run. + +Also, there may be some operations, that subsystems want to carry out before a +hibernation/suspend or after a restore/resume, requiring the system to be fully +functional, so the drivers' .suspend() and .resume() routines are not suitable +for this purpose. For example, device drivers may want to upload firmware to +their devices after a restore from a hibernation image, but they cannot do it by +calling request_firmware() from their .resume() routines (user land processes +are frozen at this point). The solution may be to load the firmware into +memory before processes are frozen and upload it from there in the .resume() +routine. Of course, a hibernation notifier may be used for this purpose. + +The subsystems that have such needs can register suspend notifiers that will be +called upon the following events by the suspend core: + +PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will + be frozen immediately. + +PM_POST_HIBERNATION The system memory state has been restored from a + hibernation image or an error occured during the + hibernation. Device drivers' .resume() callbacks have + been executed and tasks have been thawed. + +PM_SUSPEND_PREPARE The system is preparing for a suspend. + +PM_POST_SUSPEND The system has just resumed or an error occured during + the suspend. Device drivers' .resume() callbacks have + been executed and tasks have been thawed. + +It is generally assumed that whatever the notifiers do for +PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously, +operations performed for PM_SUSPEND_PREPARE should be reversed for +PM_POST_SUSPEND. Additionally, all of the notifiers are called for +PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and +all of the notifiers are called for PM_POST_SUSPEND if one of them fails for +PM_SUSPEND_PREPARE. + +The hibernation and suspend notifiers are called with pm_mutex held. They are +defined in the usual way, but their last argument is meaningless (it is always +NULL). To register and/or unregister a suspend notifier use the functions +register_pm_notifier() and unregister_pm_notifier(), respectively, defined in +include/linux/suspend.h . If you don't need to unregister the notifier, you can +also use the pm_notifier() macro defined in include/linux/suspend.h . diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 0c2434822094..76733a3962f0 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -1250,6 +1250,12 @@ platforms are moved over to use the flattened-device-tree model. network device. This is used by the bootwrapper to interpret MAC addresses passed by the firmware when no information other than indices is available to associate an address with a device. + - phy-connection-type : a string naming the controller/PHY interface type, + i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", + "tbi", or "rtbi". This property is only really needed if the connection + is of type "rgmii-id", as all other connection types are detected by + hardware. + Example: diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO new file mode 100644 index 000000000000..48fc67bfbe3d --- /dev/null +++ b/Documentation/zh_CN/HOWTO @@ -0,0 +1,536 @@ +Chinese translated version of Documentation/HOWTO + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer, if this translation is outdated +or there is problem with translation. + +Maintainer: Greg Kroah-Hartman <greg@kroah.com> +Chinese maintainer: Li Yang <leoli@freescale.com> +--------------------------------------------------------------------- +Documentation/HOWTO 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 + +英文版维护者: Greg Kroah-Hartman <greg@kroah.com> +中文版维护者: 李阳 Li Yang <leoli@freescale.com> +中文版翻译者: 李阳 Li Yang <leoli@freescale.com> +中文版校译者: 钟宇 TripleX Chung <xxx.phy@gmail.com> + 陈琦 Maggie Chen <chenqi@beyondsoft.com> + 王聪 Wang Cong <xiyou.wangcong@gmail.com> + +以下为正文 +--------------------------------------------------------------------- + +如何参与Linux内核开发 +--------------------- + +这是一篇将如何参与Linux内核开发的相关问题一网打尽的终极秘笈。它将指导你 +成为一名Linux内核开发者,并且学会如何同Linux内核开发社区合作。它尽可能不 +包括任何关于内核编程的技术细节,但会给你指引一条获得这些知识的正确途径。 + +如果这篇文章中的任何内容不再适用,请给文末列出的文件维护者发送补丁。 + + +入门 +---- + +你想了解如何成为一名Linux内核开发者?或者老板吩咐你“给这个设备写个Linux +驱动程序”?这篇文章的目的就是教会你达成这些目标的全部诀窍,它将描述你需 +要经过的流程以及给出如何同内核社区合作的一些提示。它还将试图解释内核社区 +为何这样运作。 + +Linux内核大部分是由C语言写成的,一些体系结构相关的代码用到了汇编语言。要 +参与内核开发,你必须精通C语言。除非你想为某个架构开发底层代码,否则你并 +不需要了解(任何体系结构的)汇编语言。下面列举的书籍虽然不能替代扎实的C +语言教育和多年的开发经验,但如果需要的话,做为参考还是不错的: + - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall] + 《C程序设计语言(第2版·新版)》(徐宝文 李志 译)[机械工业出版社] + - "Practical C Programming" by Steve Oualline [O'Reilly] + 《实用C语言编程(第三版)》(郭大海 译)[中国电力出版社] + - "C: A Reference Manual" by Harbison and Steele [Prentice Hall] + 《C语言参考手册(原书第5版)》(邱仲潘 等译)[机械工业出版社] + +Linux内核使用GNU C和GNU工具链开发。虽然它遵循ISO C89标准,但也用到了一些 +标准中没有定义的扩展。内核是自给自足的C环境,不依赖于标准C库的支持,所以 +并不支持C标准中的部分定义。比如long long类型的大数除法和浮点运算就不允许 +使用。有时候确实很难弄清楚内核对工具链的要求和它所使用的扩展,不幸的是目 +前还没有明确的参考资料可以解释它们。请查阅gcc信息页(使用“info gcc”命令 +显示)获得一些这方面信息。 + +请记住你是在学习怎么和已经存在的开发社区打交道。它由一群形形色色的人组成, +他们对代码、风格和过程有着很高的标准。这些标准是在长期实践中总结出来的, +适应于地理上分散的大型开发团队。它们已经被很好得整理成档,建议你在开发 +之前尽可能多的学习这些标准,而不要期望别人来适应你或者你公司的行为方式。 + + +法律问题 +-------- + +Linux内核源代码都是在GPL(通用公共许可证)的保护下发布的。要了解这种许可 +的细节请查看源代码主目录下的COPYING文件。如果你对它还有更深入问题请联系 +律师,而不要在Linux内核邮件组上提问。因为邮件组里的人并不是律师,不要期 +望他们的话有法律效力。 + +对于GPL的常见问题和解答,请访问以下链接: + http://www.gnu.org/licenses/gpl-faq.html + + +文档 +---- + +Linux内核代码中包含有大量的文档。这些文档对于学习如何与内核社区互动有着 +不可估量的价值。当一个新的功能被加入内核,最好把解释如何使用这个功能的文 +档也放进内核。当内核的改动导致面向用户空间的接口发生变化时,最好将相关信 +息或手册页(manpages)的补丁发到mtk-manpages@gmx.net,以向手册页(manpages) +的维护者解释这些变化。 + +以下是内核代码中需要阅读的文档: + README + 文件简要介绍了Linux内核的背景,并且描述了如何配置和编译内核。内核的 + 新用户应该从这里开始。 + + Documentation/Changes + 文件给出了用来编译和使用内核所需要的最小软件包列表。 + + Documentation/CodingStyle + 描述Linux内核的代码风格和理由。所有新代码需要遵守这篇文档中定义的规 + 范。大多数维护者只会接收符合规定的补丁,很多人也只会帮忙检查符合风格 + 的代码。 + + Documentation/SubmittingPatches + Documentation/SubmittingDrivers + 这两份文档明确描述如何创建和发送补丁,其中包括(但不仅限于): + - 邮件内容 + - 邮件格式 + - 选择收件人 + 遵守这些规定并不能保证提交成功(因为所有补丁需要通过严格的内容和风格 + 审查),但是忽视他们几乎就意味着失败。 + + 其他关于如何正确地生成补丁的优秀文档包括: + "The Perfect Patch" + http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt + "Linux kernel patch submission format" + http://linux.yyz.us/patch-format.html + + Documentation/stable_api_nonsense.txt + 论证内核为什么特意不包括稳定的内核内部API,也就是说不包括像这样的特 + 性: + - 子系统中间层(为了兼容性?) + - 在不同操作系统间易于移植的驱动程序 + - 减缓(甚至阻止)内核代码的快速变化 + 这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系 + 统转移到Linux的人来说也很重要。 + + Documentation/SecurityBugs + 如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来 + 提醒其他内核开发者并帮助解决这个问题。 + + Documentation/ManagementStyle + 描述内核维护者的工作方法及其共有特点。这对于刚刚接触内核开发(或者对 + 它感到好奇)的人来说很重要,因为它解释了很多对于内核维护者独特行为的 + 普遍误解与迷惑。 + + Documentation/stable_kernel_rules.txt + 解释了稳定版内核发布的规则,以及如何将改动放入这些版本的步骤。 + + Documentation/kernel-docs.txt + 有助于内核开发的外部文档列表。如果你在内核自带的文档中没有找到你想找 + 的内容,可以查看这些文档。 + + Documentation/applying-patches.txt + 关于补丁是什么以及如何将它打在不同内核开发分支上的好介绍 + +内核还拥有大量从代码自动生成的文档。它包含内核内部API的全面介绍以及如何 +妥善处理加锁的规则。生成的文档会放在 Documentation/DocBook/目录下。在内 +核源码的主目录中使用以下不同命令将会分别生成PDF、Postscript、HTML和手册 +页等不同格式的文档: + make pdfdocs + make psdocs + make htmldocs + make mandocs + + +如何成为内核开发者 +------------------ +如果你对Linux内核开发一无所知,你应该访问“Linux内核新手”计划: + http://kernelnewbies.org +它拥有一个可以问各种最基本的内核开发问题的邮件列表(在提问之前一定要记得 +查找已往的邮件,确认是否有人已经回答过相同的问题)。它还拥有一个可以获得 +实时反馈的IRC聊天频道,以及大量对于学习Linux内核开发相当有帮助的文档。 + +网站简要介绍了源代码组织结构、子系统划分以及目前正在进行的项目(包括内核 +中的和单独维护的)。它还提供了一些基本的帮助信息,比如如何编译内核和打补 +丁。 + +如果你想加入内核开发社区并协助完成一些任务,却找不到从哪里开始,可以访问 +“Linux内核房管员”计划: + http://janitor.kernelnewbies.org/ +这是极佳的起点。它提供一个相对简单的任务列表,列出内核代码中需要被重新 +整理或者改正的地方。通过和负责这个计划的开发者们一同工作,你会学到将补丁 +集成进内核的基本原理。如果还没有决定下一步要做什么的话,你还可能会得到方 +向性的指点。 + +如果你已经有一些现成的代码想要放到内核中,但是需要一些帮助来使它们拥有正 +确的格式。请访问“内核导师”计划。这个计划就是用来帮助你完成这个目标的。它 +是一个邮件列表,地址如下: + http://selenic.com/mailman/listinfo/kernel-mentors + +在真正动手修改内核代码之前,理解要修改的代码如何运作是必需的。要达到这个 +目的,没什么办法比直接读代码更有效了(大多数花招都会有相应的注释),而且 +一些特制的工具还可以提供帮助。例如,“Linux代码交叉引用”项目就是一个值得 +特别推荐的帮助工具,它将源代码显示在有编目和索引的网页上。其中一个更新及 +时的内核源码库,可以通过以下地址访问: + http://sosdg.org/~coywolf/lxr/ + + +开发流程 +-------- + +目前Linux内核开发流程包括几个“主内核分支”和很多子系统相关的内核分支。这 +些分支包括: + - 2.6.x主内核源码树 + - 2.6.x.y -stable内核源码树 + - 2.6.x -git内核补丁集 + - 2.6.x -mm内核补丁集 + - 子系统相关的内核源码树和补丁集 + + +2.6.x内核主源码树 +----------------- +2.6.x内核是由Linus Torvalds(Linux的创造者)亲自维护的。你可以在 +kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开发遵循以下步 +骤: + - 每当一个新版本的内核被发布,为期两周的集成窗口将被打开。在这段时间里 + 维护者可以向Linus提交大段的修改,通常这些修改已经被放到-mm内核中几个 + 星期了。提交大量修改的首选方式是使用git工具(内核的代码版本管理工具 + ,更多的信息可以在http://git.or.cz/获取),不过使用普通补丁也是可以 + 的。 + - 两个星期以后-rc1版本内核发布。之后只有不包含可能影响整个内核稳定性的 + 新功能的补丁才可能被接受。请注意一个全新的驱动程序(或者文件系统)有 + 可能在-rc1后被接受是因为这样的修改完全独立,不会影响其他的代码,所以 + 没有造成内核退步的风险。在-rc1以后也可以用git向Linus提交补丁,不过所 + 有的补丁需要同时被发送到相应的公众邮件列表以征询意见。 + - 当Linus认为当前的git源码树已经达到一个合理健全的状态足以发布供人测试 + 时,一个新的-rc版本就会被发布。计划是每周都发布新的-rc版本。 + - 这个过程一直持续下去直到内核被认为达到足够稳定的状态,持续时间大概是 + 6个星期。 + +关于内核发布,值得一提的是Andrew Morton在linux-kernel邮件列表中如是说: + “没有人知道新内核何时会被发布,因为发布是根据已知bug的情况来决定 + 的,而不是根据一个事先制定好的时间表。” + + +2.6.x.y -stable(稳定版)内核源码树 +----------------------------------- +由4个数字组成的内核版本号说明此内核是-stable版本。它们包含基于2.6.x版本 +内核的相对较小且至关重要的修补,这些修补针对安全性问题或者严重的内核退步。 + +这种版本的内核适用于那些期望获得最新的稳定版内核并且不想参与测试开发版或 +者实验版的用户。 + +如果没有2.6.x.y版本内核存在,那么最新的2.6.x版本内核就相当于是当前的稳定 +版内核。 + +2.6.x.y版本由“稳定版”小组(邮件地址<stable@kernel.org>)维护,一般隔周发 +布新版本。 + +内核源码中的Documentation/stable_kernel_rules.txt文件具体描述了可被稳定 +版内核接受的修改类型以及发布的流程。 + + +2.6.x -git补丁集 +---------------- +Linus的内核源码树的每日快照,这个源码树是由git工具管理的(由此得名)。这 +些补丁通常每天更新以反映Linus的源码树的最新状态。它们比-rc版本的内核源码 +树更具试验性质,因为这个补丁集是全自动生成的,没有任何人来确认其是否真正 +健全。 + + +2.6.x -mm补丁集 +--------------- +这是由Andrew Morton维护的试验性内核补丁集。Andrew将所有子系统的内核源码 +和补丁拼凑到一起,并且加入了大量从linux-kernel邮件列表中采集的补丁。这个 +源码树是新功能和补丁的试炼场。当补丁在-mm补丁集里证明了其价值以后Andrew +或者相应子系统的维护者会将补丁发给Linus以便集成进主内核源码树。 + +在将所有新补丁发给Linus以集成到主内核源码树之前,我们非常鼓励先把这些补 +丁放在-mm版内核源码树中进行测试。 + +这些内核版本不适合在需要稳定运行的系统上运行,因为运行它们比运行任何其他 +内核分支都更具有风险。 + +如果你想为内核开发进程提供帮助,请尝试并使用这些内核版本,并在 +linux-kernel邮件列表中提供反馈,告诉大家你遇到了问题还是一切正常。 + +通常-mm版补丁集不光包括这些额外的试验性补丁,还包括发布时-git版主源码树 +中的改动。 + +-mm版内核没有固定的发布周期,但是通常在每两个-rc版内核发布之间都会有若干 +个-mm版内核发布(一般是1至3个)。 + + +子系统相关内核源码树和补丁集 +---------------------------- +相当一部分内核子系统开发者会公开他们自己的开发源码树,以便其他人能了解内 +核的不同领域正在发生的事情。如上所述,这些源码树会被集成到-mm版本内核中。 + +下面是目前可用的一些内核源码树的列表: + 通过git管理的源码树: + - Kbuild开发源码树, Sam Ravnborg <sam@ravnborg.org> + git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git + + - ACPI开发源码树, Len Brown <len.brown@intel.com> + git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git + + - 块设备开发源码树, Jens Axboe <axboe@suse.de> + git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git + + - DRM开发源码树, Dave Airlie <airlied@linux.ie> + git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git + + - ia64开发源码树, Tony Luck <tony.luck@intel.com> + git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git + + - ieee1394开发源码树, Jody McIntyre <scjody@modernduck.com> + git.kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git + + - infiniband开发源码树, Roland Dreier <rolandd@cisco.com> + git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git + + - libata开发源码树, Jeff Garzik <jgarzik@pobox.com> + git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git + + - 网络驱动程序开发源码树, Jeff Garzik <jgarzik@pobox.com> + git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git + + - pcmcia开发源码树, Dominik Brodowski <linux@dominikbrodowski.net> + git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git + + - SCSI开发源码树, James Bottomley <James.Bottomley@SteelEye.com> + git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git + + 使用quilt管理的补丁集: + - USB, PCI, 驱动程序核心和I2C, Greg Kroah-Hartman <gregkh@suse.de> + kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ + - x86-64, 部分i386, Andi Kleen <ak@suse.de> + ftp.firstfloor.org:/pub/ak/x86_64/quilt/ + + 其他内核源码树可以在http://git.kernel.org的列表中和MAINTAINERS文件里 + 找到。 + +报告bug +------- + +bugzilla.kernel.org是Linux内核开发者们用来跟踪内核Bug的网站。我们鼓励用 +户在这个工具中报告找到的所有bug。如何使用内核bugzilla的细节请访问: + http://test.kernel.org/bugzilla/faq.html + +内核源码主目录中的REPORTING-BUGS文件里有一个很好的模板。它指导用户如何报 +告可能的内核bug以及需要提供哪些信息来帮助内核开发者们找到问题的根源。 + + +利用bug报告 +----------- + +练习内核开发技能的最好办法就是修改其他人报告的bug。你不光可以帮助内核变 +得更加稳定,还可以学会如何解决实际问题从而提高自己的技能,并且让其他开发 +者感受到你的存在。修改bug是赢得其他开发者赞誉的最好办法,因为并不是很多 +人都喜欢浪费时间去修改别人报告的bug。 + +要尝试修改已知的bug,请访问http://bugzilla.kernel.org网址。如果你想获得 +最新bug的通知,可以订阅bugme-new邮件列表(只有新的bug报告会被寄到这里) +或者订阅bugme-janitor邮件列表(所有bugzilla的变动都会被寄到这里)。 + + http://lists.osdl.org/mailman/listinfo/bugme-new + http://lists.osdl.org/mailman/listinfo/bugme-janitors + + +邮件列表 +-------- + +正如上面的文档所描述,大多数的骨干内核开发者都加入了Linux Kernel邮件列 +表。如何订阅和退订列表的细节可以在这里找到: + http://vger.kernel.org/vger-lists.html#linux-kernel +网上很多地方都有这个邮件列表的存档(archive)。可以使用搜索引擎来找到这些 +存档。比如: + http://dir.gmane.org/gmane.linux.kernel +在发信之前,我们强烈建议你先在存档中搜索你想要讨论的问题。很多已经被详细 +讨论过的问题只在邮件列表的存档中可以找到。 + +大多数内核子系统也有自己独立的邮件列表来协调各自的开发工作。从 +MAINTAINERS文件中可以找到不同话题对应的邮件列表。 + +很多邮件列表架设在kernel.org服务器上。这些列表的信息可以在这里找到: + http://vger.kernel.org/vger-lists.html + +在使用这些邮件列表时,请记住保持良好的行为习惯。下面的链接提供了与这些列 +表(或任何其它邮件列表)交流的一些简单规则,虽然内容有点滥竽充数。 + http://www.albion.com/netiquette/ + +当有很多人回复你的邮件时,邮件的抄送列表会变得很长。请不要将任何人从抄送 +列表中删除,除非你有足够的理由这么做。也不要只回复到邮件列表。请习惯于同 +一封邮件接收两次(一封来自发送者一封来自邮件列表),而不要试图通过添加一 +些奇特的邮件头来解决这个问题,人们不会喜欢的。 + +记住保留你所回复内容的上下文和源头。在你回复邮件的顶部保留“某某某说到……” +这几行。将你的评论加在被引用的段落之间而不要放在邮件的顶部。 + +如果你在邮件中附带补丁,请确认它们是可以直接阅读的纯文本(如 +Documentation/SubmittingPatches文档中所述)。内核开发者们不希望遇到附件 +或者被压缩了的补丁。只有这样才能保证他们可以直接评论你的每行代码。请确保 +你使用的邮件发送程序不会修改空格和制表符。一个防范性的测试方法是先将邮件 +发送给自己,然后自己尝试是否可以顺利地打上收到的补丁。如果测试不成功,请 +调整或者更换你的邮件发送程序直到它正确工作为止。 + +总而言之,请尊重其他的邮件列表订阅者。 + + +同内核社区合作 +---------------- + +内核社区的目标就是提供尽善尽美的内核。所以当你提交补丁期望被接受进内核的 +时候,它的技术价值以及其他方面都将被评审。那么你可能会得到什么呢? + - 批评 + - 评论 + - 要求修改 + - 要求证明修改的必要性 + - 沉默 + +要记住,这些是把补丁放进内核的正常情况。你必须学会听取对补丁的批评和评论, +从技术层面评估它们,然后要么重写你的补丁要么简明扼要地论证修改是不必要 +的。如果你发的邮件没有得到任何回应,请过几天后再试一次,因为有时信件会湮 +没在茫茫信海中。 + +你不应该做的事情: + - 期望自己的补丁不受任何质疑就直接被接受 + - 翻脸 + - 忽略别人的评论 + - 没有按照别人的要求做任何修改就重新提交 + +在一个努力追寻最好技术方案的社区里,对于一个补丁有多少好处总会有不同的见 +解。你必须要抱着合作的态度,愿意改变自己的观点来适应内核的风格。或者至少 +愿意去证明你的想法是有价值的。记住,犯错误是允许的,只要你愿意朝着正确的 +方案去努力。 + +如果你的第一个补丁换来的是一堆修改建议,这是很正常的。这并不代表你的补丁 +不会被接受,也不意味着有人和你作对。你只需要改正所有提出的问题然后重新发 +送你的补丁。 + +内核社区和公司文化的差异 +------------------------ + +内核社区的工作模式同大多数传统公司开发队伍的工作模式并不相同。下面这些例 +子,可以帮助你避免某些可能发生问题: + 用这些话介绍你的修改提案会有好处: + - 它同时解决了多个问题 + - 它删除了2000行代码 + - 这是补丁,它已经解释了我想要说明的 + - 我在5种不同的体系结构上测试过它…… + - 这是一系列小补丁用来…… + - 这个修改提高了普通机器的性能…… + + 应该避免如下的说法: + - 我们在AIX/ptx/Solaris就是这么做的,所以这么做肯定是好的…… + - 我做这行已经20年了,所以…… + - 为了我们公司赚钱考虑必须这么做 + - 这是我们的企业产品线所需要的 + - 这里是描述我观点的1000页设计文档 + - 这是一个5000行的补丁用来…… + - 我重写了现在乱七八糟的代码,这就是…… + - 我被规定了最后期限,所以这个补丁需要立刻被接受 + +另外一个内核社区与大部分传统公司的软件开发队伍不同的地方是无法面对面地交 +流。使用电子邮件和IRC聊天工具做为主要沟通工具的一个好处是性别和种族歧视 +将会更少。Linux内核的工作环境更能接受妇女和少数族群,因为每个人在别人眼 +里只是一个邮件地址。国际化也帮助了公平的实现,因为你无法通过姓名来判断人 +的性别。男人有可能叫李丽,女人也有可能叫王刚。大多数在Linux内核上工作过 +并表达过看法的女性对在linux上工作的经历都给出了正面的评价。 + +对于一些不习惯使用英语的人来说,语言可能是一个引起问题的障碍。在邮件列表 +中要正确地表达想法必需良好地掌握语言,所以建议你在发送邮件之前最好检查一 +下英文写得是否正确。 + + +拆分修改 +-------- + +Linux内核社区并不喜欢一下接收大段的代码。修改需要被恰当地介绍、讨论并且 +拆分成独立的小段。这几乎完全和公司中的习惯背道而驰。你的想法应该在开发最 +开始的阶段就让大家知道,这样你就可以及时获得对你正在进行的开发的反馈。这 +样也会让社区觉得你是在和他们协作,而不是仅仅把他们当作倾销新功能的对象。 +无论如何,你不要一次性地向邮件列表发送50封信,你的补丁序列应该永远用不到 +这么多。 + +将补丁拆开的原因如下: + +1) 小的补丁更有可能被接受,因为它们不需要太多的时间和精力去验证其正确性。 + 一个5行的补丁,可能在维护者看了一眼以后就会被接受。而500行的补丁则 + 需要数个小时来审查其正确性(所需时间随补丁大小增加大约呈指数级增长)。 + + 当出了问题的时候,小的补丁也会让调试变得非常容易。一个一个补丁地回溯 + 将会比仔细剖析一个被打上的大补丁(这个补丁破坏了其他东西)容易得多。 + +2)不光发送小的补丁很重要,在提交之前重新编排、化简(或者仅仅重新排列) + 补丁也是很重要的。 + +这里有内核开发者Al Viro打的一个比方: + “想象一个老师正在给学生批改数学作业。老师并不希望看到学生为了得 + 到正确解法所进行的尝试和产生的错误。他希望看到的是最干净最优雅的 + 解答。好学生了解这点,绝不会把最终解决之前的中间方案提交上去。” + + 内核开发也是这样。维护者和评审者不希望看到一个人在解决问题时的思 + 考过程。他们只希望看到简单和优雅的解决方案。 + +直接给出一流的解决方案,和社区一起协作讨论尚未完成的工作,这两者之间似乎 +很难找到一个平衡点。所以最好尽早开始收集有利于你进行改进的反馈;同时也要 +保证修改分成很多小块,这样在整个项目都准备好被包含进内核之前,其中的一部 +分可能会先被接收。 + +必须了解这样做是不可接受的:试图将未完成的工作提交进内核,然后再找时间修 +复。 + + +证明修改的必要性 +---------------- +除了将补丁拆成小块,很重要的一点是让Linux社区了解他们为什么需要这样修改。 +你必须证明新功能是有人需要的并且是有用的。 + + +记录修改 +-------- + +当你发送补丁的时候,需要特别留意邮件正文的内容。因为这里的信息将会做为补 +丁的修改记录(ChangeLog),会被一直保留以备大家查阅。它需要完全地描述补丁, +包括: + - 为什么需要这个修改 + - 补丁的总体设计 + - 实现细节 + - 测试结果 + +想了解它具体应该看起来像什么,请查阅以下文档中的“ChangeLog”章节: + “The Perfect Patch” + http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt + + +这些事情有时候做起来很难。要在任何方面都做到完美可能需要好几年时间。这是 +一个持续提高的过程,它需要大量的耐心和决心。只要不放弃,你一定可以做到。 +很多人已经做到了,而他们都曾经和现在的你站在同样的起点上。 + + +--------------- +感谢Paolo Ciarrocchi允许“开发流程”部分基于他所写的文章 +(http://linux.tar.bz/articles/2.6-development_process),感谢Randy +Dunlap和Gerrit Huizenga完善了应该说和不该说的列表。感谢Pat Mochel, Hanna +Linder, Randy Dunlap, Kay Sievers, Vojtech Pavlik, Jan Kara, Josh Boyer, +Kees Cook, Andrew Morton, Andi Kleen, Vadim Lobanov, Jesper Juhl, Adrian +Bunk, Keri Harris, Frans Pop, David A. Wheeler, Junio Hamano, Michael +Kerrisk和Alex Shepard的评审、建议和贡献。没有他们的帮助,这篇文档是不可 +能完成的。 + + + +英文版维护者: Greg Kroah-Hartman <greg@kroah.com> diff --git a/Documentation/zh_CN/stable_api_nonsense.txt b/Documentation/zh_CN/stable_api_nonsense.txt new file mode 100644 index 000000000000..c26a27d1ee7d --- /dev/null +++ b/Documentation/zh_CN/stable_api_nonsense.txt @@ -0,0 +1,157 @@ +Chinese translated version of Documentation/stable_api_nonsense.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have problem +communicating in English you can also ask the Chinese maintainer for help. +Contact the Chinese maintainer, if this translation is outdated or there +is problem with translation. + +Maintainer: Greg Kroah-Hartman <greg@kroah.com> +Chinese maintainer: TripleX Chung <zhongyu@18mail.cn> +--------------------------------------------------------------------- +Documentation/stable_api_nonsense.txt 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 + +英文版维护者: Greg Kroah-Hartman <greg@kroah.com> +中文版维护者: 钟宇 TripleX Chung <zhongyu@18mail.cn> +中文版翻译者: 钟宇 TripleX Chung <zhongyu@18mail.cn> +中文版校译者: 李阳 Li Yang <leoli@freescale.com> +以下为正文 +--------------------------------------------------------------------- + +写作本文档的目的,是为了解释为什么Linux既没有二进制内核接口,也没有稳定 +的内核接口。这里所说的内核接口,是指内核里的接口,而不是内核和用户空间 +的接口。内核到用户空间的接口,是提供给应用程序使用的系统调用,系统调用 +在历史上几乎没有过变化,将来也不会有变化。我有一些老应用程序是在0.9版本 +或者更早版本的内核上编译的,在使用2.6版本内核的Linux发布上依然用得很好 +。用户和应用程序作者可以将这个接口看成是稳定的。 + + +执行纲要 +-------- + +你也许以为自己想要稳定的内核接口,但是你不清楚你要的实际上不是它。你需 +要的其实是稳定的驱动程序,而你只有将驱动程序放到公版内核的源代码树里, +才有可能达到这个目的。而且这样做还有很多其它好处,正是因为这些好处使得 +Linux能成为强壮,稳定,成熟的操作系统,这也是你最开始选择Linux的原因。 + + +入门 +----- + +只有那些写驱动程序的“怪人”才会担心内核接口的改变,对广大用户来说,既 +看不到内核接口,也不需要去关心它。 + +首先,我不打算讨论关于任何非GPL许可的内核驱动的法律问题,这些非GPL许可 +的驱动程序包括不公开源代码,隐藏源代码,二进制或者是用源代码包装,或者 +是其它任何形式的不能以GPL许可公开源代码的驱动程序。如果有法律问题,请咨 +询律师,我只是一个程序员,所以我只打算探讨技术问题(不是小看法律问题, +法律问题很实际,并且需要一直关注)。 + +既然只谈技术问题,我们就有了下面两个主题:二进制内核接口和稳定的内核源 +代码接口。这两个问题是互相关联的,让我们先解决掉二进制接口的问题。 + + +二进制内核接口 +-------------- +假如我们有一个稳定的内核源代码接口,那么自然而然的,我们就拥有了稳定的 +二进制接口,是这样的吗?错。让我们看看关于Linux内核的几点事实: + - 取决于所用的C编译器的版本,不同的内核数据结构里的结构体的对齐方 +式会有差别,代码中不同函数的表现形式也不一样(函数是不是被inline编译取 +决于编译器行为)。不同的函数的表现形式并不重要,但是数据结构内部的对齐 +方式很关键。 + - 取决于内核的配置选项,不同的选项会让内核的很多东西发生改变: + - 同一个结构体可能包含不同的成员变量 + - 有的函数可能根本不会被实现(比如编译的时候没有选择SMP支持 +,一些锁函数就会被定义成空函数)。 + - 内核使用的内存会以不同的方式对齐,这取决于不同的内核配置选 +项。 + - Linux可以在很多的不同体系结构的处理器上运行。在某个体系结构上编 +译好的二进制驱动程序,不可能在另外一个体系结构上正确的运行。 + +对于一个特定的内核,满足这些条件并不难,使用同一个C编译器和同样的内核配 +置选项来编译驱动程序模块就可以了。这对于给一个特定Linux发布的特定版本提 +供驱动程序,是完全可以满足需求的。但是如果你要给不同发布的不同版本都发 +布一个驱动程序,就需要在每个发布上用不同的内核设置参数都编译一次内核, +这简直跟噩梦一样。而且还要注意到,每个Linux发布还提供不同的Linux内核, +这些内核都针对不同的硬件类型进行了优化(有很多种不同的处理器,还有不同 +的内核设置选项)。所以每发布一次驱动程序,都需要提供很多不同版本的内核 +模块。 + +相信我,如果你真的要采取这种发布方式,一定会慢慢疯掉,我很久以前就有过 +深刻的教训... + + +稳定的内核源代码接口 +-------------------- + +如果有人不将他的内核驱动程序,放入公版内核的源代码树,而又想让驱动程序 +一直保持在最新的内核中可用,那么这个话题将会变得没完没了。 + 内核开发是持续而且快节奏的,从来都不会慢下来。内核开发人员在当前接口中 +找到bug,或者找到更好的实现方式。一旦发现这些,他们就很快会去修改当前的 +接口。修改接口意味着,函数名可能会改变,结构体可能被扩充或者删减,函数 +的参数也可能发生改变。一旦接口被修改,内核中使用这些接口的地方需要同时 +修正,这样才能保证所有的东西继续工作。 + +举一个例子,内核的USB驱动程序接口在USB子系统的整个生命周期中,至少经历 +了三次重写。这些重写解决以下问题: + - 把数据流从同步模式改成非同步模式,这个改动减少了一些驱动程序的 +复杂度,提高了所有USB驱动程序的吞吐率,这样几乎所有的USB设备都能以最大 +速率工作了。 + - 修改了USB核心代码中为USB驱动分配数据包内存的方式,所有的驱动都 +需要提供更多的参数给USB核心,以修正了很多已经被记录在案的死锁。 + +这和一些封闭源代码的操作系统形成鲜明的对比,在那些操作系统上,不得不额 +外的维护旧的USB接口。这导致了一个可能性,新的开发者依然会不小心使用旧的 +接口,以不恰当的方式编写代码,进而影响到操作系统的稳定性。 + 在上面的例子中,所有的开发者都同意这些重要的改动,在这样的情况下修改代 +价很低。如果Linux保持一个稳定的内核源代码接口,那么就得创建一个新的接口 +;旧的,有问题的接口必须一直维护,给Linux USB开发者带来额外的工作。既然 +所有的Linux USB驱动的作者都是利用自己的时间工作,那么要求他们去做毫无意 +义的免费额外工作,是不可能的。 + 安全问题对Linux来说十分重要。一个安全问题被发现,就会在短时间内得到修 +正。在很多情况下,这将导致Linux内核中的一些接口被重写,以从根本上避免安 +全问题。一旦接口被重写,所有使用这些接口的驱动程序,必须同时得到修正, +以确定安全问题已经得到修复并且不可能在未来还有同样的安全问题。如果内核 +内部接口不允许改变,那么就不可能修复这样的安全问题,也不可能确认这样的 +安全问题以后不会发生。 +开发者一直在清理内核接口。如果一个接口没有人在使用了,它就会被删除。这 +样可以确保内核尽可能的小,而且所有潜在的接口都会得到尽可能完整的测试 +(没有人使用的接口是不可能得到良好的测试的)。 + + +要做什么 +------- + +如果你写了一个Linux内核驱动,但是它还不在Linux源代码树里,作为一个开发 +者,你应该怎么做?为每个发布的每个版本提供一个二进制驱动,那简直是一个 +噩梦,要跟上永远处于变化之中的内核接口,也是一件辛苦活。 +很简单,让你的驱动进入内核源代码树(要记得我们在谈论的是以GPL许可发行 +的驱动,如果你的代码不符合GPL,那么祝你好运,你只能自己解决这个问题了, +你这个吸血鬼<把Andrew和Linus对吸血鬼的定义链接到这里>)。当你的代码加入 +公版内核源代码树之后,如果一个内核接口改变,你的驱动会直接被修改接口的 +那个人修改。保证你的驱动永远都可以编译通过,并且一直工作,你几乎不需要 +做什么事情。 + +把驱动放到内核源代码树里会有很多的好处: + - 驱动的质量会提升,而维护成本(对原始作者来说)会下降。 + - 其他人会给驱动添加新特性。 + - 其他人会找到驱动中的bug并修复。 + - 其他人会在驱动中找到性能优化的机会。 + - 当外部的接口的改变需要修改驱动程序的时候,其他人会修改驱动程序 +。 + - 不需要联系任何发行商,这个驱动会自动的随着所有的Linux发布一起发 +布。 + +和别的操作系统相比,Linux为更多不同的设备提供现成的驱动,而且能在更多不 +同体系结构的处理器上支持这些设备。这个经过考验的开发模式,必然是错不了 +的 :) + +------------- +感谢 Randy Dunlap, Andrew Morton, David Brownell, Hanna Linder, +Robert Love, and Nishanth Aravamudan 对于本文档早期版本的评审和建议。 + +英文版维护者: Greg Kroah-Hartman <greg@kroah.com> diff --git a/MAINTAINERS b/MAINTAINERS index e78f62f13bac..0c83823f14c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -732,6 +732,13 @@ L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) W: http://blackfin.uclinux.org S: Supported +BLACKFIN EMAC DRIVER +P: Bryan Wu +M: bryan.wu@analog.com +L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +W: http://blackfin.uclinux.org +S: Supported + BLACKFIN RTC DRIVER P: Mike Frysinger M: michael.frysinger@analog.com @@ -1345,21 +1352,60 @@ S: Supported EDAC-CORE P: Doug Thompson -M: norsk5@xmission.com +M: dougthompson@xmission.com L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Supported EDAC-E752X P: Mark Gross +P: Doug Thompson M: mark.gross@intel.com +M: dougthompson@xmission.com L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained EDAC-E7XXX P: Doug Thompson -M: norsk5@xmission.com +M: dougthompson@xmission.com +L: bluesmoke-devel@lists.sourceforge.net +W: bluesmoke.sourceforge.net +S: Maintained + +EDAC-I82443BXGX +P: Tim Small +M: tim@buttersideup.com +L: bluesmoke-devel@lists.sourceforge.net +W: bluesmoke.sourceforge.net +S: Maintained + +EDAC-I3000 +P: Jason Uhlenkott +M: juhlenko@akamai.com +L: bluesmoke-devel@lists.sourceforge.net +W: bluesmoke.sourceforge.net +S: Maintained + +EDAC-I5000 +P: Doug Thompson +M: dougthompson@xmission.com +L: bluesmoke-devel@lists.sourceforge.net +W: bluesmoke.sourceforge.net +S: Maintained + +EDAC-I82975X +P: Ranganathan Desikan +P: Arvind R. +M: rdesikan@jetzbroadband.com +M: arvind@acarlab.com +L: bluesmoke-devel@lists.sourceforge.net +W: bluesmoke.sourceforge.net +S: Maintained + +EDAC-PASEMI +P: Egor Martovetsky +M: egor@pasemi.com L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained @@ -2850,8 +2896,8 @@ L: linux-kernel@vger.kernel.org S: Maintained POSIX CLOCKS and TIMERS -P: George Anzinger -M: george@mvista.com +P: Thomas Gleixner +M: tglx@linutronix.de L: linux-kernel@vger.kernel.org S: Supported @@ -3282,9 +3328,19 @@ M: neilb@suse.de L: linux-raid@vger.kernel.org S: Supported -SOFTWARE SUSPEND: +HIBERNATION (aka Software Suspend, aka swsusp): +P: Pavel Machek +M: pavel@suse.cz +P: Rafael J. Wysocki +M: rjw@sisk.pl +L: linux-pm@lists.linux-foundation.org +S: Supported + +SUSPEND TO RAM: P: Pavel Machek M: pavel@suse.cz +P: Rafael J. Wysocki +M: rjw@sisk.pl L: linux-pm@lists.linux-foundation.org S: Maintained @@ -514,6 +514,12 @@ CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) # disable pointer signed / unsigned warnings in gcc 4.0 CFLAGS += $(call cc-option,-Wno-pointer-sign,) +# Use --build-id when available. +LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ + $(call ld-option, -Wl$(comma)--build-id,)) +LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID) +LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID) + # Default kernel image to build when no specific target is given. # KBUILD_IMAGE may be overruled on the command line or # set in the environment diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index bd03dc94c72b..026ba9af6d6a 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -119,8 +119,7 @@ module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, } nsyms = symtab->sh_size / sizeof(Elf64_Sym); - chains = kmalloc(nsyms * sizeof(struct got_entry), GFP_KERNEL); - memset(chains, 0, nsyms * sizeof(struct got_entry)); + chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL); got->sh_size = 0; got->sh_addralign = 8; diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 449e76f118d3..fe13daa5cb2c 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -3,7 +3,7 @@ OUTPUT_FORMAT("elf64-alpha") OUTPUT_ARCH(alpha) ENTRY(__start) -PHDRS { kernel PT_LOAD ; } +PHDRS { kernel PT_LOAD; note PT_NOTE; } jiffies = jiffies_64; SECTIONS { @@ -28,6 +28,9 @@ SECTIONS __ex_table : { *(__ex_table) } __stop___ex_table = .; + NOTES :kernel :note + .dummy : { *(.dummy) } :kernel + RODATA /* Will be freed after init */ @@ -69,10 +72,7 @@ SECTIONS . = ALIGN(8); SECURITY_INIT - . = ALIGN(8192); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(8192) . = ALIGN(2*8192); __init_end = .; diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index f5862792a167..a0e18da594d9 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -148,21 +148,17 @@ do_page_fault(unsigned long address, unsigned long mmcsr, the fault. */ fault = handle_mm_fault(mm, vma, address, cause > 0); up_read(&mm->mmap_sem); - - switch (fault) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; return; /* Something tried to access memory that isn't in our memory map. diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 2b7a8f5d8cf2..5ff5406666b4 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -66,6 +66,7 @@ SECTIONS . = ALIGN(4096); __per_cpu_start = .; *(.data.percpu) + *(.data.percpu.shared_aligned) __per_cpu_end = .; #ifndef CONFIG_XIP_KERNEL __init_begin = _stext; diff --git a/arch/arm/mach-iop13xx/pci.c b/arch/arm/mach-iop13xx/pci.c index 9d63d7f260ca..99d94cb1bafd 100644 --- a/arch/arm/mach-iop13xx/pci.c +++ b/arch/arm/mach-iop13xx/pci.c @@ -1002,11 +1002,10 @@ int iop13xx_pci_setup(int nr, struct pci_sys_data *sys) if (nr > 1) return 0; - res = kmalloc(sizeof(struct resource) * 2, GFP_KERNEL); + res = kcalloc(2, sizeof(struct resource), GFP_KERNEL); if (!res) panic("PCI: unable to alloc resources"); - memset(res, 0, sizeof(struct resource) * 2); /* 'nr' assumptions: * ATUX is always 0 diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 75d491448e45..c04124a095cf 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -183,20 +183,20 @@ good_area: */ survive: fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, fsr & (1 << 11)); - - /* - * Handle the "normal" cases first - successful and sigbus - */ - switch (fault) { - case VM_FAULT_MAJOR: + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + return fault; + BUG(); + } + if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; - return fault; - case VM_FAULT_MINOR: + else tsk->min_flt++; - case VM_FAULT_SIGBUS: - return fault; - } + return fault; +out_of_memory: if (!is_init(tsk)) goto out; @@ -249,7 +249,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR */ - if (fault >= VM_FAULT_MINOR) + if (likely(!(fault & VM_FAULT_ERROR))) return 0; /* @@ -259,8 +259,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!user_mode(regs)) goto no_context; - switch (fault) { - case VM_FAULT_OOM: + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, or some other thing * happened to us that made us unable to handle @@ -269,17 +268,15 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) printk("VM: killing process %s\n", tsk->comm); do_exit(SIGKILL); return 0; - - case VM_FAULT_SIGBUS: + } + if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to * successfully fix up this page fault. */ sig = SIGBUS; code = BUS_ADRERR; - break; - - default: + } else { /* * Something tried to access memory that * isn't in our memory map.. @@ -287,7 +284,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) sig = SIGSEGV; code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; - break; } __do_user_fault(tsk, addr, fsr, sig, code, regs); diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c index 93c0cee0fb5e..dec638a0c8d9 100644 --- a/arch/arm26/mm/fault.c +++ b/arch/arm26/mm/fault.c @@ -170,20 +170,20 @@ good_area: */ survive: fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, DO_COW(fsr)); - - /* - * Handle the "normal" cases first - successful and sigbus - */ - switch (fault) { - case VM_FAULT_MAJOR: + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + return fault; + BUG(); + } + if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; - return fault; - case VM_FAULT_MINOR: + else tsk->min_flt++; - case VM_FAULT_SIGBUS: - return fault; - } + return fault; +out_of_memory: fault = -3; /* out of memory */ if (!is_init(tsk)) goto out; @@ -225,13 +225,11 @@ int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) /* * Handle the "normal" case first */ - switch (fault) { - case VM_FAULT_MINOR: - case VM_FAULT_MAJOR: + if (likely(!(fault & VM_FAULT_ERROR))) return 0; - case VM_FAULT_SIGBUS: + if (fault & VM_FAULT_SIGBUS) goto do_sigbus; - } + /* else VM_FAULT_OOM */ /* * If we are in kernel mode at this point, we diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index 4b2495285d94..ae2d2c593b2b 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -64,6 +64,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) int writeaccess; long signr; int code; + int fault; if (notify_page_fault(regs, ecr)) return; @@ -132,20 +133,18 @@ good_area: * fault. */ survive: - switch (handle_mm_fault(mm, vma, address, writeaccess)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, writeaccess); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/blackfin/mm/blackfin_sram.c b/arch/blackfin/mm/blackfin_sram.c index 16c6169ed01b..b99ea883cd22 100644 --- a/arch/blackfin/mm/blackfin_sram.c +++ b/arch/blackfin/mm/blackfin_sram.c @@ -521,10 +521,9 @@ void *sram_alloc_with_lsl(size_t size, unsigned long flags) struct sram_list_struct *lsl = NULL; struct mm_struct *mm = current->mm; - lsl = kmalloc(sizeof(struct sram_list_struct), GFP_KERNEL); + lsl = kzalloc(sizeof(struct sram_list_struct), GFP_KERNEL); if (!lsl) return NULL; - memset(lsl, 0, sizeof(*lsl)); if (flags & L1_INST_SRAM) addr = l1_inst_sram_alloc(size); diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c index d47cfbf98d6e..1de0026bb94e 100644 --- a/arch/cris/arch-v10/drivers/pcf8563.c +++ b/arch/cris/arch-v10/drivers/pcf8563.c @@ -180,9 +180,7 @@ err: void __exit pcf8563_exit(void) { - if (unregister_chrdev(PCF8563_MAJOR, DEVICE_NAME) < 0) { - printk(KERN_INFO "%s: Unable to unregister device.\n", PCF8563_NAME); - } + unregister_chrdev(PCF8563_MAJOR, DEVICE_NAME); } /* diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c index fa8d50007e4c..da479a14f836 100644 --- a/arch/cris/arch-v32/drivers/pcf8563.c +++ b/arch/cris/arch-v32/drivers/pcf8563.c @@ -193,9 +193,7 @@ err: void __exit pcf8563_exit(void) { - if (unregister_chrdev(PCF8563_MAJOR, DEVICE_NAME) < 0) { - printk(KERN_INFO "%s: Unable to unregister device.\n", PCF8563_NAME); - } + unregister_chrdev(PCF8563_MAJOR, DEVICE_NAME); } /* diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index 832fc63504d4..66f9500fbc02 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -91,14 +91,12 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, if (!mem_base) goto out; - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); if (!dev->dma_mem) goto out; - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!dev->dma_mem->bitmap) goto free1_out; - memset(dev->dma_mem->bitmap, 0, bitmap_size); dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S index dfa25e1542b9..651a77f2ccc4 100644 --- a/arch/cris/arch-v32/vmlinux.lds.S +++ b/arch/cris/arch-v32/vmlinux.lds.S @@ -91,10 +91,7 @@ SECTIONS } SECURITY_INIT - . = ALIGN (8192); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(8192) #ifdef CONFIG_BLK_DEV_INITRD .init.ramfs : { diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index c73e91f1299a..8672ab7d7978 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -179,6 +179,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, struct mm_struct *mm; struct vm_area_struct * vma; siginfo_t info; + int fault; D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n", address, smp_processor_id(), instruction_pointer(regs), @@ -283,18 +284,18 @@ do_page_fault(unsigned long address, struct pt_regs *regs, * the fault. */ - switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - default: - goto out_of_memory; + fault = handle_mm_fault(mm, vma, address, writeaccess & 1); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/frv/Makefile b/arch/frv/Makefile index 038e3a8457e0..9bf7345c5cc9 100644 --- a/arch/frv/Makefile +++ b/arch/frv/Makefile @@ -88,7 +88,7 @@ ASFLAGS += -mno-fdpic # make sure the .S files get compiled with debug info # and disable optimisations that are unhelpful whilst debugging ifdef CONFIG_DEBUG_INFO -CFLAGS += -O1 +#CFLAGS += -O1 AFLAGS += -Wa,--gdwarf2 ASFLAGS += -Wa,--gdwarf2 endif diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 481dc1374640..3b71e0c86399 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -57,10 +57,7 @@ SECTIONS __alt_instructions_end = .; .altinstr_replacement : { *(.altinstr_replacement) } - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(4096); diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 3f12296c3688..6798fa0257b1 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -40,6 +40,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear pud_t *pue; pte_t *pte; int write; + int fault; #if 0 const char *atxc[16] = { @@ -162,18 +163,18 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, ear0, write)) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - default: - goto out_of_memory; + fault = handle_mm_fault(mm, vma, ear0, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 4ee83577bf61..c42b5ab49deb 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -14,7 +14,7 @@ /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; -unsigned long acpi_video_flags; +unsigned long acpi_realmode_flags; extern char wakeup_start, wakeup_end; extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); @@ -68,9 +68,11 @@ static int __init acpi_sleep_setup(char *str) { while ((str != NULL) && (*str != '\0')) { if (strncmp(str, "s3_bios", 7) == 0) - acpi_video_flags = 1; + acpi_realmode_flags |= 1; if (strncmp(str, "s3_mode", 7) == 0) - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); @@ -80,9 +82,11 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); +/* Ouch, we want to delete this. We already have better version in userspace, in + s2ram from suspend.sf.net project */ static __init int reset_videomode_after_s3(struct dmi_system_id *d) { - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; return 0; } diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index a2295a34b2c7..ed0a0f2c1597 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -13,6 +13,21 @@ # cs = 0x1234, eip = 0x05 # +#define BEEP \ + inb $97, %al; \ + outb %al, $0x80; \ + movb $3, %al; \ + outb %al, $97; \ + outb %al, $0x80; \ + movb $-74, %al; \ + outb %al, $67; \ + outb %al, $0x80; \ + movb $-119, %al; \ + outb %al, $66; \ + outb %al, $0x80; \ + movb $15, %al; \ + outb %al, $66; + ALIGN .align 4096 ENTRY(wakeup_start) @@ -31,6 +46,11 @@ wakeup_code: movw %cs, %ax movw %ax, %ds # Make ds:0 point to wakeup_start movw %ax, %ss + + testl $4, realmode_flags - wakeup_code + jz 1f + BEEP +1: mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board movw $0x0e00 + 'S', %fs:(0x12) @@ -41,7 +61,7 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic - testl $1, video_flags - wakeup_code + testl $1, realmode_flags - wakeup_code jz 1f lcall $0xc000,$3 movw %cs, %ax @@ -49,7 +69,7 @@ wakeup_code: movw %ax, %ss 1: - testl $2, video_flags - wakeup_code + testl $2, realmode_flags - wakeup_code jz 1f mov video_mode - wakeup_code, %ax call mode_set @@ -88,7 +108,11 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic - ljmpl $__KERNEL_CS,$wakeup_pmode_return + testl $8, realmode_flags - wakeup_code + jz 1f + BEEP +1: + ljmpl $__KERNEL_CS, $wakeup_pmode_return real_save_gdt: .word 0 .long 0 @@ -97,7 +121,8 @@ real_save_cr3: .long 0 real_save_cr4: .long 0 real_magic: .long 0 video_mode: .long 0 -video_flags: .long 0 +realmode_flags: .long 0 +beep_flags: .long 0 real_efer_save_restore: .long 0 real_save_efer_edx: .long 0 real_save_efer_eax: .long 0 @@ -260,8 +285,8 @@ ENTRY(acpi_copy_wakeup_routine) movl saved_videomode, %edx movl %edx, video_mode - wakeup_start (%eax) - movl acpi_video_flags, %edx - movl %edx, video_flags - wakeup_start (%eax) + movl acpi_realmode_flags, %edx + movl %edx, realmode_flags - wakeup_start (%eax) movl $0x12345678, real_magic - wakeup_start (%eax) movl $0x12345678, saved_magic popl %ebx diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 25f7eb513928..7288ac88d746 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -19,6 +19,11 @@ #include <xen/interface/xen.h> +#ifdef CONFIG_LGUEST_GUEST +#include <linux/lguest.h> +#include "../../../drivers/lguest/lg.h" +#endif + #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -124,4 +129,19 @@ void foo(void) OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); #endif + +#ifdef CONFIG_LGUEST_GUEST + BLANK(); + OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); + OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); + OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); + OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); + OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp); + OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc); + OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc); + OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt); + OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum); + OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); + OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); +#endif } diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index cff95d10a4d8..d26fc063a760 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index d2daf672f4a2..ba44d40b066d 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -21,7 +21,7 @@ #include <asm/apic.h> #include <asm/uaccess.h> -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index d32fd4b6f78e..3e7753c78b9b 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -41,6 +41,10 @@ #include <linux/mca.h> #endif +#if defined(CONFIG_EDAC) +#include <linux/edac.h> +#endif + #include <asm/processor.h> #include <asm/system.h> #include <asm/io.h> @@ -638,6 +642,14 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); @@ -1056,6 +1068,7 @@ asmlinkage void math_state_restore(void) thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } +EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 252f9010f283..debd7dbb4158 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -27,6 +27,7 @@ static int tsc_enabled; * an extra value to store the TSC freq */ unsigned int tsc_khz; +EXPORT_SYMBOL_GPL(tsc_khz); int tsc_disable; @@ -58,10 +59,11 @@ __setup("notsc", tsc_setup); */ static int tsc_unstable; -static inline int check_tsc_unstable(void) +int check_tsc_unstable(void) { return tsc_unstable; } +EXPORT_SYMBOL_GPL(check_tsc_unstable); /* Accellerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 00f1bc47d3a2..7d72cce00529 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -60,7 +60,9 @@ SECTIONS __stop___ex_table = .; } - BUG_TABLE + NOTES :text :note + + BUG_TABLE :text . = ALIGN(4); .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { @@ -181,6 +183,7 @@ SECTIONS .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; *(.data.percpu) + *(.data.percpu.shared_aligned) __per_cpu_end = .; } . = ALIGN(4096); @@ -207,6 +210,4 @@ SECTIONS STABS_DEBUG DWARF_DEBUG - - NOTES } diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 1ecb3e43b523..e92a10124935 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -303,6 +303,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, struct vm_area_struct * vma; unsigned long address; int write, si_code; + int fault; /* get the address */ address = read_cr2(); @@ -422,20 +423,18 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) goto out_of_memory; - default: - BUG(); + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; /* * Did it hit the DOS screen memory VA from vm86 mode? diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index e208747d5de4..3dbb3987df27 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.21-rc3 -# Thu Mar 8 11:07:09 2007 +# Linux kernel version: 2.6.22 +# Thu Jul 19 13:54:47 2007 # CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -19,15 +19,15 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_IPC_NS is not set CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set +# CONFIG_USER_NS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 # CONFIG_CPUSETS is not set CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set @@ -46,18 +46,19 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 -# CONFIG_SLOB is not set - -# -# Loadable module support -# CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set @@ -65,12 +66,9 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y CONFIG_STOP_MACHINE=y - -# -# Block layer -# CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_BLK_DEV_BSG is not set # # IO Schedulers @@ -91,6 +89,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_IA64=y CONFIG_64BIT=y CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y CONFIG_MMU=y CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y @@ -114,8 +113,8 @@ CONFIG_IA64_DIG=y CONFIG_MCKINLEY=y # CONFIG_IA64_PAGE_SIZE_4KB is not set # CONFIG_IA64_PAGE_SIZE_8KB is not set -CONFIG_IA64_PAGE_SIZE_16KB=y -# CONFIG_IA64_PAGE_SIZE_64KB is not set +# CONFIG_IA64_PAGE_SIZE_16KB is not set +CONFIG_IA64_PAGE_SIZE_64KB=y CONFIG_PGTABLE_3=y # CONFIG_PGTABLE_4 is not set # CONFIG_HZ_100 is not set @@ -145,6 +144,9 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_ARCH_FLATMEM_ENABLE=y @@ -152,11 +154,11 @@ CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_IA32_SUPPORT=y -CONFIG_COMPAT=y +# CONFIG_IA32_SUPPORT is not set CONFIG_IA64_MCA_RECOVERY=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y +# CONFIG_IA64_MC_ERR_INJECT is not set # CONFIG_IA64_ESI is not set CONFIG_KEXEC=y # CONFIG_CRASH_DUMP is not set @@ -166,6 +168,7 @@ CONFIG_KEXEC=y # CONFIG_EFI_VARS=y CONFIG_EFI_PCDP=y +CONFIG_DMIID=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m @@ -175,7 +178,6 @@ CONFIG_BINFMT_MISC=m CONFIG_PM=y CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set -# CONFIG_PM_SYSFS_DEPRECATED is not set # # ACPI (Advanced Configuration and Power Interface) Support @@ -205,13 +207,11 @@ CONFIG_ACPI_CONTAINER=m # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y +CONFIG_PCI_SYSCALL=y # CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y # CONFIG_PCI_MSI is not set # CONFIG_PCI_DEBUG is not set - -# -# PCI Hotplug Support -# CONFIG_HOTPLUG_PCI=m # CONFIG_HOTPLUG_PCI_FAKE is not set CONFIG_HOTPLUG_PCI_ACPI=m @@ -232,7 +232,6 @@ CONFIG_NET=y # # Networking options # -# CONFIG_NETDEBUG is not set CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y @@ -270,20 +269,8 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_INET6_TUNNEL is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set - -# -# DCCP Configuration (EXPERIMENTAL) -# # CONFIG_IP_DCCP is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# # CONFIG_IP_SCTP is not set - -# -# TIPC Configuration (EXPERIMENTAL) -# # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set @@ -309,7 +296,17 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -324,25 +321,9 @@ CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# # CONFIG_CONNECTOR is not set - -# -# Memory Technology Devices (MTD) -# # CONFIG_MTD is not set - -# -# Parallel port support -# # CONFIG_PARPORT is not set - -# -# Plug and Play support -# CONFIG_PNP=y # CONFIG_PNP_DEBUG is not set @@ -350,10 +331,7 @@ CONFIG_PNP=y # Protocols # CONFIG_PNPACPI=y - -# -# Block devices -# +CONFIG_BLK_DEV=y # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -370,16 +348,11 @@ CONFIG_BLK_DEV_RAM_SIZE=4096 CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set - -# -# Misc devices -# +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set - -# -# ATA/ATAPI/MFM/RLL support -# CONFIG_IDE=y CONFIG_IDE_MAX_HWIFS=4 CONFIG_BLK_DEV_IDE=y @@ -396,6 +369,7 @@ CONFIG_BLK_DEV_IDEFLOPPY=y CONFIG_BLK_DEV_IDESCSI=m # CONFIG_BLK_DEV_IDEACPI is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y # # IDE chipset support/bugfixes @@ -404,12 +378,12 @@ CONFIG_BLK_DEV_IDESCSI=m # CONFIG_BLK_DEV_IDEPNP is not set CONFIG_BLK_DEV_IDEPCI=y # CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y # CONFIG_BLK_DEV_OFFBOARD is not set CONFIG_BLK_DEV_GENERIC=y # CONFIG_BLK_DEV_OPTI621 is not set CONFIG_BLK_DEV_IDEDMA_PCI=y # CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_IDEDMA_ONLYDISK is not set # CONFIG_BLK_DEV_AEC62XX is not set # CONFIG_BLK_DEV_ALI15X3 is not set @@ -438,7 +412,6 @@ CONFIG_BLK_DEV_PIIX=y # CONFIG_IDE_ARM is not set CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDEDMA_IVB is not set -CONFIG_IDEDMA_AUTO=y # CONFIG_BLK_DEV_HD is not set # @@ -446,6 +419,7 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -468,6 +442,7 @@ CONFIG_CHR_DEV_SG=m # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m # # SCSI Transports @@ -514,15 +489,7 @@ CONFIG_SCSI_QLOGIC_1280=y # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_SRP is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# # CONFIG_ATA is not set - -# -# Multi-device support (RAID and LVM) -# CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m @@ -539,6 +506,7 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m # CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set # # Fusion MPT device support @@ -553,46 +521,25 @@ CONFIG_FUSION_CTL=y # # IEEE 1394 (FireWire) support # +# CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set - -# -# I2O device support -# # CONFIG_I2O is not set - -# -# Network device support -# CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set CONFIG_DUMMY=m # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set # CONFIG_NET_SB1000 is not set - -# -# ARCnet devices -# # CONFIG_ARCNET is not set - -# -# PHY device support -# # CONFIG_PHYLIB is not set - -# -# Ethernet (10 or 100Mbit) -# CONFIG_NET_ETHERNET=y CONFIG_MII=m # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set - -# -# Tulip family network device support -# CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set CONFIG_TULIP=m @@ -623,10 +570,7 @@ CONFIG_E100=m # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set - -# -# Ethernet (1000 Mbit) -# +CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y @@ -639,36 +583,36 @@ CONFIG_E1000=y # CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SKY2 is not set -# CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y # CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set - -# -# Ethernet (10000 Mbit) -# +CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set - -# -# Token Ring devices -# +# CONFIG_MLX4_CORE is not set # CONFIG_TR is not set # -# Wireless LAN (non-hamradio) +# Wireless LAN # -# CONFIG_NET_RADIO is not set +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set # -# Wan interfaces +# USB Network Adapters # +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set +# CONFIG_USB_USBNET is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set @@ -678,18 +622,9 @@ CONFIG_TIGON3=y # CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y CONFIG_NETPOLL=y -# CONFIG_NETPOLL_RX is not set # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y - -# -# ISDN subsystem -# # CONFIG_ISDN is not set - -# -# Telephony Support -# # CONFIG_PHONE is not set # @@ -697,6 +632,7 @@ CONFIG_NET_POLL_CONTROLLER=y # CONFIG_INPUT=y # CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set # # Userland interfaces @@ -722,9 +658,17 @@ CONFIG_KEYBOARD_ATKBD=y # CONFIG_KEYBOARD_STOWAWAY is not set CONFIG_INPUT_MOUSE=y CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -790,19 +734,10 @@ CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# # CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# # CONFIG_WATCHDOG is not set # CONFIG_HW_RANDOM is not set CONFIG_EFI_RTC=y -# CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set CONFIG_AGP=m @@ -821,15 +756,8 @@ CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set CONFIG_HPET_MMAP=y # CONFIG_HANGCHECK_TIMER is not set - -# -# TPM devices -# # CONFIG_TCG_TPM is not set - -# -# I2C support -# +CONFIG_DEVPORT=y # CONFIG_I2C is not set # @@ -837,21 +765,17 @@ CONFIG_HPET_MMAP=y # # CONFIG_SPI is not set # CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# # CONFIG_W1 is not set - -# -# Hardware Monitoring support -# +# CONFIG_POWER_SUPPLY is not set CONFIG_HWMON=y # CONFIG_HWMON_VID is not set # CONFIG_SENSORS_ABITUGURU is not set # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47B397 is not set # CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_W83627HF is not set # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -863,17 +787,20 @@ CONFIG_HWMON=y # Multimedia devices # # CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set +# CONFIG_DVB_CORE is not set +CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # # CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set # CONFIG_FB is not set # @@ -887,16 +814,18 @@ CONFIG_DUMMY_CONSOLE=y # Sound # # CONFIG_SOUND is not set - -# -# HID Devices -# +CONFIG_HID_SUPPORT=y CONFIG_HID=y # CONFIG_HID_DEBUG is not set # -# USB support +# USB Input Devices # +CONFIG_USB_HID=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y @@ -907,8 +836,10 @@ CONFIG_USB=y # Miscellaneous USB options # CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set +# CONFIG_USB_PERSIST is not set # CONFIG_USB_OTG is not set # @@ -918,7 +849,6 @@ CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set -# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set # CONFIG_USB_ISP116X_HCD is not set CONFIG_USB_OHCI_HCD=m # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set @@ -926,6 +856,7 @@ CONFIG_USB_OHCI_HCD=m CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set # # USB Device Class drivers @@ -955,41 +886,10 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_LIBUSUAL is not set # -# USB Input Devices -# -CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set -# CONFIG_USB_AIPTEK is not set -# CONFIG_USB_WACOM is not set -# CONFIG_USB_ACECAD is not set -# CONFIG_USB_KBTAB is not set -# CONFIG_USB_POWERMATE is not set -# CONFIG_USB_TOUCHSCREEN is not set -# CONFIG_USB_YEALINK is not set -# CONFIG_USB_XPAD is not set -# CONFIG_USB_ATI_REMOTE is not set -# CONFIG_USB_ATI_REMOTE2 is not set -# CONFIG_USB_KEYSPAN_REMOTE is not set -# CONFIG_USB_APPLETOUCH is not set -# CONFIG_USB_GTCO is not set - -# # USB Imaging devices # # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set - -# -# USB Network Adapters -# -# CONFIG_USB_CATC is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set -# CONFIG_USB_USBNET is not set # CONFIG_USB_MON is not set # @@ -1033,10 +933,6 @@ CONFIG_USB_HID=y # USB Gadget Support # # CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# # CONFIG_MMC is not set # @@ -1051,17 +947,9 @@ CONFIG_USB_HID=y # # LED Triggers # - -# -# InfiniBand support -# # CONFIG_INFINIBAND is not set # -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# - -# # Real Time Clock # # CONFIG_RTC_CLASS is not set @@ -1080,12 +968,9 @@ CONFIG_USB_HID=y # # -# Auxiliary Display support -# - -# -# Virtualization +# Userspace I/O # +# CONFIG_UIO is not set # CONFIG_MSPEC is not set # @@ -1200,7 +1085,8 @@ CONFIG_EXPORTFS=m CONFIG_NFS_COMMON=y CONFIG_SUNRPC=m CONFIG_SUNRPC_GSS=m -CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_SUNRPC_BIND34 is not set +CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set CONFIG_SMB_FS=m CONFIG_SMB_NLS_DEFAULT=y @@ -1214,7 +1100,6 @@ CONFIG_CIFS=m # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set # # Partition Types @@ -1236,6 +1121,7 @@ CONFIG_SGI_PARTITION=y # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set # # Native Language Support @@ -1292,11 +1178,14 @@ CONFIG_NLS_UTF8=m CONFIG_BITREVERSE=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y @@ -1319,8 +1208,8 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_LOG_BUF_SHIFT=20 CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set # CONFIG_DEBUG_SLAB is not set @@ -1343,17 +1232,12 @@ CONFIG_IA64_GRANULE_16MB=y # CONFIG_DISABLE_VHPT is not set # CONFIG_IA64_DEBUG_CMPXCHG is not set # CONFIG_IA64_DEBUG_IRQ is not set -CONFIG_SYSVIPC_COMPAT=y # # Security options # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set - -# -# Cryptographic options -# CONFIG_CRYPTO=y CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_BLKCIPHER=m @@ -1373,6 +1257,7 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_BLOWFISH is not set @@ -1390,7 +1275,4 @@ CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_CRC32C is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# +CONFIG_CRYPTO_HW=y diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 461f8ee738fb..03172dc8c403 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.21-rc3 -# Thu Mar 8 11:01:03 2007 +# Linux kernel version: 2.6.22 +# Thu Jul 19 13:55:32 2007 # CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -19,15 +19,15 @@ CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_IPC_NS is not set CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set +# CONFIG_USER_NS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 # CONFIG_CPUSETS is not set CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set @@ -46,18 +46,19 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 -# CONFIG_SLOB is not set - -# -# Loadable module support -# CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set @@ -65,12 +66,9 @@ CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y CONFIG_STOP_MACHINE=y - -# -# Block layer -# CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_BLK_DEV_BSG is not set # # IO Schedulers @@ -91,6 +89,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_IA64=y CONFIG_64BIT=y CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y CONFIG_MMU=y CONFIG_SWIOTLB=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y @@ -114,8 +113,8 @@ CONFIG_IA64_GENERIC=y CONFIG_MCKINLEY=y # CONFIG_IA64_PAGE_SIZE_4KB is not set # CONFIG_IA64_PAGE_SIZE_8KB is not set -CONFIG_IA64_PAGE_SIZE_16KB=y -# CONFIG_IA64_PAGE_SIZE_64KB is not set +# CONFIG_IA64_PAGE_SIZE_16KB is not set +CONFIG_IA64_PAGE_SIZE_64KB=y CONFIG_PGTABLE_3=y # CONFIG_PGTABLE_4 is not set # CONFIG_HZ_100 is not set @@ -147,6 +146,9 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_ARCH_FLATMEM_ENABLE=y @@ -164,7 +166,7 @@ CONFIG_COMPAT=y CONFIG_IA64_MCA_RECOVERY=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y -# CONFIG_MC_ERR_INJECT is not set +# CONFIG_IA64_MC_ERR_INJECT is not set CONFIG_SGI_SN=y # CONFIG_IA64_ESI is not set @@ -180,6 +182,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_EFI_VARS=y CONFIG_EFI_PCDP=y +CONFIG_DMIID=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m @@ -189,7 +192,6 @@ CONFIG_BINFMT_MISC=m CONFIG_PM=y CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set -# CONFIG_PM_SYSFS_DEPRECATED is not set # # ACPI (Advanced Configuration and Power Interface) Support @@ -220,13 +222,11 @@ CONFIG_ACPI_CONTAINER=m # CONFIG_PCI=y CONFIG_PCI_DOMAINS=y +CONFIG_PCI_SYSCALL=y # CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y # CONFIG_PCI_MSI is not set # CONFIG_PCI_DEBUG is not set - -# -# PCI Hotplug Support -# CONFIG_HOTPLUG_PCI=m # CONFIG_HOTPLUG_PCI_FAKE is not set CONFIG_HOTPLUG_PCI_ACPI=m @@ -248,7 +248,6 @@ CONFIG_NET=y # # Networking options # -# CONFIG_NETDEBUG is not set CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y @@ -286,20 +285,8 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_INET6_TUNNEL is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set - -# -# DCCP Configuration (EXPERIMENTAL) -# # CONFIG_IP_DCCP is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# # CONFIG_IP_SCTP is not set - -# -# TIPC Configuration (EXPERIMENTAL) -# # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set @@ -325,7 +312,17 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -340,25 +337,9 @@ CONFIG_FW_LOADER=m # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# # CONFIG_CONNECTOR is not set - -# -# Memory Technology Devices (MTD) -# # CONFIG_MTD is not set - -# -# Parallel port support -# # CONFIG_PARPORT is not set - -# -# Plug and Play support -# CONFIG_PNP=y # CONFIG_PNP_DEBUG is not set @@ -366,10 +347,7 @@ CONFIG_PNP=y # Protocols # CONFIG_PNPACPI=y - -# -# Block devices -# +CONFIG_BLK_DEV=y # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -386,16 +364,11 @@ CONFIG_BLK_DEV_RAM_SIZE=4096 CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set - -# -# Misc devices -# +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_SGI_IOC4=y # CONFIG_TIFM_CORE is not set - -# -# ATA/ATAPI/MFM/RLL support -# CONFIG_IDE=y CONFIG_IDE_MAX_HWIFS=4 CONFIG_BLK_DEV_IDE=y @@ -412,6 +385,7 @@ CONFIG_BLK_DEV_IDEFLOPPY=y CONFIG_BLK_DEV_IDESCSI=m # CONFIG_BLK_DEV_IDEACPI is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y # # IDE chipset support/bugfixes @@ -420,12 +394,12 @@ CONFIG_BLK_DEV_IDESCSI=m # CONFIG_BLK_DEV_IDEPNP is not set CONFIG_BLK_DEV_IDEPCI=y CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_IDEPCI_PCIBUS_ORDER=y # CONFIG_BLK_DEV_OFFBOARD is not set CONFIG_BLK_DEV_GENERIC=y # CONFIG_BLK_DEV_OPTI621 is not set CONFIG_BLK_DEV_IDEDMA_PCI=y # CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_IDEDMA_ONLYDISK is not set # CONFIG_BLK_DEV_AEC62XX is not set # CONFIG_BLK_DEV_ALI15X3 is not set @@ -455,7 +429,6 @@ CONFIG_BLK_DEV_SGIIOC4=y # CONFIG_IDE_ARM is not set CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDEDMA_IVB is not set -CONFIG_IDEDMA_AUTO=y # CONFIG_BLK_DEV_HD is not set # @@ -463,6 +436,7 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -485,6 +459,7 @@ CONFIG_CHR_DEV_SG=m # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m # # SCSI Transports @@ -492,7 +467,7 @@ CONFIG_CHR_DEV_SG=m CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set -# CONFIG_SCSI_SAS_ATTRS is not set +CONFIG_SCSI_SAS_ATTRS=y # CONFIG_SCSI_SAS_LIBSAS is not set # @@ -531,15 +506,7 @@ CONFIG_SCSI_QLOGIC_1280=y # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_SRP is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# # CONFIG_ATA is not set - -# -# Multi-device support (RAID and LVM) -# CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m @@ -557,6 +524,8 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m # CONFIG_DM_MULTIPATH_EMC is not set +# CONFIG_DM_MULTIPATH_RDAC is not set +# CONFIG_DM_DELAY is not set # # Fusion MPT device support @@ -564,53 +533,32 @@ CONFIG_DM_MULTIPATH=m CONFIG_FUSION=y CONFIG_FUSION_SPI=y CONFIG_FUSION_FC=m -# CONFIG_FUSION_SAS is not set +CONFIG_FUSION_SAS=y CONFIG_FUSION_MAX_SGE=128 # CONFIG_FUSION_CTL is not set # # IEEE 1394 (FireWire) support # +# CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set - -# -# I2O device support -# # CONFIG_I2O is not set - -# -# Network device support -# CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set CONFIG_DUMMY=m # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set # CONFIG_NET_SB1000 is not set - -# -# ARCnet devices -# # CONFIG_ARCNET is not set - -# -# PHY device support -# # CONFIG_PHYLIB is not set - -# -# Ethernet (10 or 100Mbit) -# CONFIG_NET_ETHERNET=y CONFIG_MII=m # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set - -# -# Tulip family network device support -# CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set CONFIG_TULIP=m @@ -641,10 +589,7 @@ CONFIG_E100=m # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set - -# -# Ethernet (1000 Mbit) -# +CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y @@ -657,36 +602,36 @@ CONFIG_E1000=y # CONFIG_SIS190 is not set # CONFIG_SKGE is not set # CONFIG_SKY2 is not set -# CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y # CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set - -# -# Ethernet (10000 Mbit) -# +CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set - -# -# Token Ring devices -# +# CONFIG_MLX4_CORE is not set # CONFIG_TR is not set # -# Wireless LAN (non-hamradio) +# Wireless LAN # -# CONFIG_NET_RADIO is not set +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set # -# Wan interfaces +# USB Network Adapters # +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set +# CONFIG_USB_USBNET is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set @@ -696,18 +641,9 @@ CONFIG_TIGON3=y # CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y CONFIG_NETPOLL=y -# CONFIG_NETPOLL_RX is not set # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y - -# -# ISDN subsystem -# # CONFIG_ISDN is not set - -# -# Telephony Support -# # CONFIG_PHONE is not set # @@ -715,6 +651,7 @@ CONFIG_NET_POLL_CONTROLLER=y # CONFIG_INPUT=y # CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set # # Userland interfaces @@ -740,9 +677,17 @@ CONFIG_KEYBOARD_ATKBD=y # CONFIG_KEYBOARD_STOWAWAY is not set CONFIG_INPUT_MOUSE=y CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -814,19 +759,10 @@ CONFIG_SERIAL_SGI_IOC4=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# # CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# # CONFIG_WATCHDOG is not set # CONFIG_HW_RANDOM is not set CONFIG_EFI_RTC=y -# CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set CONFIG_AGP=m @@ -848,15 +784,8 @@ CONFIG_HPET=y CONFIG_HPET_MMAP=y # CONFIG_HANGCHECK_TIMER is not set CONFIG_MMTIMER=y - -# -# TPM devices -# # CONFIG_TCG_TPM is not set - -# -# I2C support -# +CONFIG_DEVPORT=y # CONFIG_I2C is not set # @@ -864,21 +793,17 @@ CONFIG_MMTIMER=y # # CONFIG_SPI is not set # CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# # CONFIG_W1 is not set - -# -# Hardware Monitoring support -# +# CONFIG_POWER_SUPPLY is not set CONFIG_HWMON=y # CONFIG_HWMON_VID is not set # CONFIG_SENSORS_ABITUGURU is not set # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47B397 is not set # CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_W83627HF is not set # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -890,17 +815,20 @@ CONFIG_HWMON=y # Multimedia devices # # CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set +# CONFIG_DVB_CORE is not set +CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # # CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set # CONFIG_FB is not set # @@ -1014,9 +942,10 @@ CONFIG_SND_FM801=m # USB devices # # CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_CAIAQ is not set # -# SoC audio support +# System on Chip audio support # # CONFIG_SND_SOC is not set @@ -1025,16 +954,24 @@ CONFIG_SND_FM801=m # # CONFIG_SOUND_PRIME is not set CONFIG_AC97_BUS=m +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HID_DEBUG is not set # -# HID Devices +# USB Input Devices # -CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_USB_HID=m +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set # -# USB support +# USB HID Boot Protocol drivers # +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y @@ -1045,8 +982,10 @@ CONFIG_USB=m # Miscellaneous USB options # CONFIG_USB_DEVICEFS=y +CONFIG_USB_DEVICE_CLASS=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set +# CONFIG_USB_PERSIST is not set # CONFIG_USB_OTG is not set # @@ -1056,7 +995,6 @@ CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set -# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set # CONFIG_USB_ISP116X_HCD is not set CONFIG_USB_OHCI_HCD=m # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set @@ -1064,6 +1002,7 @@ CONFIG_USB_OHCI_HCD=m CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=m # CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set # # USB Device Class drivers @@ -1093,47 +1032,10 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_LIBUSUAL is not set # -# USB Input Devices -# -CONFIG_USB_HID=m -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set - -# -# USB HID Boot Protocol drivers -# -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -# CONFIG_USB_AIPTEK is not set -# CONFIG_USB_WACOM is not set -# CONFIG_USB_ACECAD is not set -# CONFIG_USB_KBTAB is not set -# CONFIG_USB_POWERMATE is not set -# CONFIG_USB_TOUCHSCREEN is not set -# CONFIG_USB_YEALINK is not set -# CONFIG_USB_XPAD is not set -# CONFIG_USB_ATI_REMOTE is not set -# CONFIG_USB_ATI_REMOTE2 is not set -# CONFIG_USB_KEYSPAN_REMOTE is not set -# CONFIG_USB_APPLETOUCH is not set -# CONFIG_USB_GTCO is not set - -# # USB Imaging devices # # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set - -# -# USB Network Adapters -# -# CONFIG_USB_CATC is not set -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set -# CONFIG_USB_USBNET is not set CONFIG_USB_MON=y # @@ -1177,10 +1079,6 @@ CONFIG_USB_MON=y # USB Gadget Support # # CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# # CONFIG_MMC is not set # @@ -1195,10 +1093,6 @@ CONFIG_USB_MON=y # # LED Triggers # - -# -# InfiniBand support -# CONFIG_INFINIBAND=m # CONFIG_INFINIBAND_USER_MAD is not set # CONFIG_INFINIBAND_USER_ACCESS is not set @@ -1206,6 +1100,7 @@ CONFIG_INFINIBAND_ADDR_TRANS=y CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_MTHCA_DEBUG=y # CONFIG_INFINIBAND_AMSO1100 is not set +# CONFIG_MLX4_INFINIBAND is not set CONFIG_INFINIBAND_IPOIB=m # CONFIG_INFINIBAND_IPOIB_CM is not set CONFIG_INFINIBAND_IPOIB_DEBUG=y @@ -1214,10 +1109,6 @@ CONFIG_INFINIBAND_IPOIB_DEBUG=y # CONFIG_INFINIBAND_ISER is not set # -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# - -# # Real Time Clock # # CONFIG_RTC_CLASS is not set @@ -1236,12 +1127,9 @@ CONFIG_INFINIBAND_IPOIB_DEBUG=y # # -# Auxiliary Display support -# - -# -# Virtualization +# Userspace I/O # +# CONFIG_UIO is not set # CONFIG_MSPEC is not set # @@ -1357,7 +1245,8 @@ CONFIG_EXPORTFS=m CONFIG_NFS_COMMON=y CONFIG_SUNRPC=m CONFIG_SUNRPC_GSS=m -CONFIG_RPCSEC_GSS_KRB5=m +# CONFIG_SUNRPC_BIND34 is not set +CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set CONFIG_SMB_FS=m CONFIG_SMB_NLS_DEFAULT=y @@ -1371,7 +1260,6 @@ CONFIG_CIFS=m # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set # # Partition Types @@ -1393,6 +1281,7 @@ CONFIG_SGI_PARTITION=y # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set # # Native Language Support @@ -1449,11 +1338,14 @@ CONFIG_NLS_UTF8=m CONFIG_BITREVERSE=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y @@ -1483,8 +1375,8 @@ CONFIG_MAGIC_SYSRQ=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_LOG_BUF_SHIFT=20 CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set # CONFIG_DEBUG_SLAB is not set @@ -1514,10 +1406,6 @@ CONFIG_SYSVIPC_COMPAT=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set - -# -# Cryptographic options -# CONFIG_CRYPTO=y CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_BLKCIPHER=m @@ -1537,6 +1425,7 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_BLOWFISH is not set @@ -1554,7 +1443,4 @@ CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_CRC32C is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# +CONFIG_CRYPTO_HW=y diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 6f4d3d06f0ed..e1189ba1ca5e 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -195,62 +195,27 @@ ia64_elf32_init (struct pt_regs *regs) ia32_load_state(current); } +/* + * Undo the override of setup_arg_pages() without this ia32_setup_arg_pages() + * will suffer infinite self recursion. + */ +#undef setup_arg_pages + int ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; - struct vm_area_struct *mpnt; - struct mm_struct *mm = current->mm; - int i, ret; - - stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; - mm->arg_start = bprm->p + stack_base; - - bprm->p += stack_base; - if (bprm->loader) - bprm->loader += stack_base; - bprm->exec += stack_base; - - mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); - if (!mpnt) - return -ENOMEM; - - down_write(¤t->mm->mmap_sem); - { - mpnt->vm_mm = current->mm; - mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; - mpnt->vm_end = IA32_STACK_TOP; - if (executable_stack == EXSTACK_ENABLE_X) - mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; - else if (executable_stack == EXSTACK_DISABLE_X) - mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; - else - mpnt->vm_flags = VM_STACK_FLAGS; - mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)? - PAGE_COPY_EXEC: PAGE_COPY; - if ((ret = insert_vm_struct(current->mm, mpnt))) { - up_write(¤t->mm->mmap_sem); - kmem_cache_free(vm_area_cachep, mpnt); - return ret; - } - current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt); + int ret; + + ret = setup_arg_pages(bprm, IA32_STACK_TOP, executable_stack); + if (!ret) { + /* + * Can't do it in ia64_elf32_init(). Needs to be done before + * calls to elf32_map() + */ + current->thread.ppl = ia32_init_pp_list(); } - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page *page = bprm->page[i]; - if (page) { - bprm->page[i] = NULL; - install_arg_page(mpnt, page, stack_base); - } - stack_base += PAGE_SIZE; - } - up_write(¤t->mm->mmap_sem); - - /* Can't do it in ia64_elf32_init(). Needs to be done before calls to - elf32_map() */ - current->thread.ppl = ia32_init_pp_list(); - - return 0; + return ret; } static void diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 95f517515235..c36f43c94600 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1581,7 +1581,7 @@ sys_call_table: data8 sys_sync_file_range // 1300 data8 sys_tee data8 sys_vmsplice - data8 sys_ni_syscall // reserved for move_pages + data8 sys_fallocate data8 sys_getcpu data8 sys_epoll_pwait // 1305 data8 sys_utimensat diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 37f46527d233..91e6dc1e7baf 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -118,15 +118,25 @@ static DEFINE_SPINLOCK(iosapic_lock); * vector. */ -struct iosapic_rte_info { - struct list_head rte_list; /* node in list of RTEs sharing the - * same vector */ +#define NO_REF_RTE 0 + +static struct iosapic { char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this - * IOSAPIC */ + unsigned int gsi_base; /* GSI base */ + unsigned short num_rte; /* # of RTEs on this IOSAPIC */ + int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ +#ifdef CONFIG_NUMA + unsigned short node; /* numa node association via pxm */ +#endif + spinlock_t lock; /* lock for indirect reg access */ +} iosapic_lists[NR_IOSAPICS]; + +struct iosapic_rte_info { + struct list_head rte_list; /* RTEs sharing the same vector */ char rte_index; /* IOSAPIC RTE index */ int refcnt; /* reference counter */ unsigned int flags; /* flags */ + struct iosapic *iosapic; } ____cacheline_aligned; static struct iosapic_intr_info { @@ -140,24 +150,23 @@ static struct iosapic_intr_info { unsigned char polarity: 1; /* interrupt polarity * (see iosapic.h) */ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ -} iosapic_intr_info[IA64_NUM_VECTORS]; - -static struct iosapic { - char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this - * IOSAPIC */ - unsigned short num_rte; /* # of RTEs on this IOSAPIC */ - int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ -#ifdef CONFIG_NUMA - unsigned short node; /* numa node association via pxm */ -#endif -} iosapic_lists[NR_IOSAPICS]; +} iosapic_intr_info[NR_IRQS]; static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */ static int iosapic_kmalloc_ok; static LIST_HEAD(free_rte_list); +static inline void +iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&iosapic->lock, flags); + __iosapic_write(iosapic->addr, reg, val); + spin_unlock_irqrestore(&iosapic->lock, flags); +} + /* * Find an IOSAPIC associated with a GSI */ @@ -175,17 +184,18 @@ find_iosapic (unsigned int gsi) return -1; } -static inline int -_gsi_to_vector (unsigned int gsi) +static inline int __gsi_to_irq(unsigned int gsi) { + int irq; struct iosapic_intr_info *info; struct iosapic_rte_info *rte; - for (info = iosapic_intr_info; info < - iosapic_intr_info + IA64_NUM_VECTORS; ++info) + for (irq = 0; irq < NR_IRQS; irq++) { + info = &iosapic_intr_info[irq]; list_for_each_entry(rte, &info->rtes, rte_list) - if (rte->gsi_base + rte->rte_index == gsi) - return info - iosapic_intr_info; + if (rte->iosapic->gsi_base + rte->rte_index == gsi) + return irq; + } return -1; } @@ -196,7 +206,10 @@ _gsi_to_vector (unsigned int gsi) inline int gsi_to_vector (unsigned int gsi) { - return _gsi_to_vector(gsi); + int irq = __gsi_to_irq(gsi); + if (check_irq_used(irq) < 0) + return -1; + return irq_to_vector(irq); } int @@ -204,66 +217,48 @@ gsi_to_irq (unsigned int gsi) { unsigned long flags; int irq; - /* - * XXX fix me: this assumes an identity mapping between IA-64 vector - * and Linux irq numbers... - */ + spin_lock_irqsave(&iosapic_lock, flags); - { - irq = _gsi_to_vector(gsi); - } + irq = __gsi_to_irq(gsi); spin_unlock_irqrestore(&iosapic_lock, flags); - return irq; } -static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, - unsigned int vec) +static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi) { struct iosapic_rte_info *rte; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) - if (rte->gsi_base + rte->rte_index == gsi) + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) + if (rte->iosapic->gsi_base + rte->rte_index == gsi) return rte; return NULL; } static void -set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) +set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask) { unsigned long pol, trigger, dmode; u32 low32, high32; - char __iomem *addr; int rte_index; char redir; struct iosapic_rte_info *rte; + ia64_vector vector = irq_to_vector(irq); DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); - rte = gsi_vector_to_rte(gsi, vector); + rte = find_rte(irq, gsi); if (!rte) return; /* not an IOSAPIC interrupt */ rte_index = rte->rte_index; - addr = rte->addr; - pol = iosapic_intr_info[vector].polarity; - trigger = iosapic_intr_info[vector].trigger; - dmode = iosapic_intr_info[vector].dmode; + pol = iosapic_intr_info[irq].polarity; + trigger = iosapic_intr_info[irq].trigger; + dmode = iosapic_intr_info[irq].dmode; redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; #ifdef CONFIG_SMP - { - unsigned int irq; - - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == vector) { - set_irq_affinity_info(irq, - (int)(dest & 0xffff), - redir); - break; - } - } + set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); #endif low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | @@ -275,10 +270,10 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) /* dest contains both id and eid */ high32 = (dest << IOSAPIC_DEST_SHIFT); - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - iosapic_intr_info[vector].low32 = low32; - iosapic_intr_info[vector].dest = dest; + iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_intr_info[irq].low32 = low32; + iosapic_intr_info[irq].dest = dest; } static void @@ -294,15 +289,18 @@ kexec_disable_iosapic(void) { struct iosapic_intr_info *info; struct iosapic_rte_info *rte; - u8 vec = 0; - for (info = iosapic_intr_info; info < - iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { + ia64_vector vec; + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + info = &iosapic_intr_info[irq]; + vec = irq_to_vector(irq); list_for_each_entry(rte, &info->rtes, rte_list) { - iosapic_write(rte->addr, + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), IOSAPIC_MASK|vec); - iosapic_eoi(rte->addr, vec); + iosapic_eoi(rte->iosapic->addr, vec); } } } @@ -311,54 +309,36 @@ kexec_disable_iosapic(void) static void mask_irq (unsigned int irq) { - unsigned long flags; - char __iomem *addr; u32 low32; int rte_index; - ia64_vector vec = irq_to_vector(irq); struct iosapic_rte_info *rte; - if (list_empty(&iosapic_intr_info[vec].rtes)) + if (list_empty(&iosapic_intr_info[irq].rtes)) return; /* not an IOSAPIC interrupt! */ - spin_lock_irqsave(&iosapic_lock, flags); - { - /* set only the mask bit */ - low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, - rte_list) { - addr = rte->addr; - rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - } + /* set only the mask bit */ + low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + rte_index = rte->rte_index; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } - spin_unlock_irqrestore(&iosapic_lock, flags); } static void unmask_irq (unsigned int irq) { - unsigned long flags; - char __iomem *addr; u32 low32; int rte_index; - ia64_vector vec = irq_to_vector(irq); struct iosapic_rte_info *rte; - if (list_empty(&iosapic_intr_info[vec].rtes)) + if (list_empty(&iosapic_intr_info[irq].rtes)) return; /* not an IOSAPIC interrupt! */ - spin_lock_irqsave(&iosapic_lock, flags); - { - low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, - rte_list) { - addr = rte->addr; - rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - } + low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + rte_index = rte->rte_index; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } - spin_unlock_irqrestore(&iosapic_lock, flags); } @@ -366,23 +346,24 @@ static void iosapic_set_affinity (unsigned int irq, cpumask_t mask) { #ifdef CONFIG_SMP - unsigned long flags; u32 high32, low32; int dest, rte_index; - char __iomem *addr; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; - ia64_vector vec; struct iosapic_rte_info *rte; + struct iosapic *iosapic; irq &= (~IA64_IRQ_REDIRECTED); - vec = irq_to_vector(irq); + cpus_and(mask, mask, cpu_online_map); if (cpus_empty(mask)) return; + if (reassign_irq_vector(irq, first_cpu(mask))) + return; + dest = cpu_physical_id(first_cpu(mask)); - if (list_empty(&iosapic_intr_info[vec].rtes)) + if (list_empty(&iosapic_intr_info[irq].rtes)) return; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -390,31 +371,24 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) /* dest contains both id and eid */ high32 = dest << IOSAPIC_DEST_SHIFT; - spin_lock_irqsave(&iosapic_lock, flags); - { - low32 = iosapic_intr_info[vec].low32 & - ~(7 << IOSAPIC_DELIVERY_SHIFT); - - if (redir) - /* change delivery mode to lowest priority */ - low32 |= (IOSAPIC_LOWEST_PRIORITY << - IOSAPIC_DELIVERY_SHIFT); - else - /* change delivery mode to fixed */ - low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); - - iosapic_intr_info[vec].low32 = low32; - iosapic_intr_info[vec].dest = dest; - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, - rte_list) { - addr = rte->addr; - rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), - high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); - } + low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + if (redir) + /* change delivery mode to lowest priority */ + low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + else + /* change delivery mode to fixed */ + low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); + low32 &= IOSAPIC_VECTOR_MASK; + low32 |= irq_to_vector(irq); + + iosapic_intr_info[irq].low32 = low32; + iosapic_intr_info[irq].dest = dest; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + iosapic = rte->iosapic; + rte_index = rte->rte_index; + iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } - spin_unlock_irqrestore(&iosapic_lock, flags); #endif } @@ -434,10 +408,20 @@ iosapic_end_level_irq (unsigned int irq) { ia64_vector vec = irq_to_vector(irq); struct iosapic_rte_info *rte; + int do_unmask_irq = 0; - move_native_irq(irq); - list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) - iosapic_eoi(rte->addr, vec); + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_irq(irq); + } + + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) + iosapic_eoi(rte->iosapic->addr, vec); + + if (unlikely(do_unmask_irq)) { + move_masked_irq(irq); + unmask_irq(irq); + } } #define iosapic_shutdown_level_irq mask_irq @@ -519,13 +503,12 @@ iosapic_version (char __iomem *addr) * unsigned int reserved2 : 8; * } */ - return iosapic_read(addr, IOSAPIC_VERSION); + return __iosapic_read(addr, IOSAPIC_VERSION); } -static int iosapic_find_sharable_vector (unsigned long trigger, - unsigned long pol) +static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol) { - int i, vector = -1, min_count = -1; + int i, irq = -ENOSPC, min_count = -1; struct iosapic_intr_info *info; /* @@ -533,21 +516,21 @@ static int iosapic_find_sharable_vector (unsigned long trigger, * supported yet */ if (trigger == IOSAPIC_EDGE) - return -1; + return -EINVAL; - for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) { + for (i = 0; i <= NR_IRQS; i++) { info = &iosapic_intr_info[i]; if (info->trigger == trigger && info->polarity == pol && - (info->dmode == IOSAPIC_FIXED || info->dmode == - IOSAPIC_LOWEST_PRIORITY)) { + (info->dmode == IOSAPIC_FIXED || + info->dmode == IOSAPIC_LOWEST_PRIORITY) && + can_request_irq(i, IRQF_SHARED)) { if (min_count == -1 || info->count < min_count) { - vector = i; + irq = i; min_count = info->count; } } } - - return vector; + return irq; } /* @@ -555,25 +538,25 @@ static int iosapic_find_sharable_vector (unsigned long trigger, * assign a new vector for the other and make the vector available */ static void __init -iosapic_reassign_vector (int vector) +iosapic_reassign_vector (int irq) { - int new_vector; + int new_irq; - if (!list_empty(&iosapic_intr_info[vector].rtes)) { - new_vector = assign_irq_vector(AUTO_ASSIGN); - if (new_vector < 0) + if (!list_empty(&iosapic_intr_info[irq].rtes)) { + new_irq = create_irq(); + if (new_irq < 0) panic("%s: out of interrupt vectors!\n", __FUNCTION__); printk(KERN_INFO "Reassigning vector %d to %d\n", - vector, new_vector); - memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], + irq_to_vector(irq), irq_to_vector(new_irq)); + memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq], sizeof(struct iosapic_intr_info)); - INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes); - list_move(iosapic_intr_info[vector].rtes.next, - &iosapic_intr_info[new_vector].rtes); - memset(&iosapic_intr_info[vector], 0, + INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes); + list_move(iosapic_intr_info[irq].rtes.next, + &iosapic_intr_info[new_irq].rtes); + memset(&iosapic_intr_info[irq], 0, sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].low32 = IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); + iosapic_intr_info[irq].low32 = IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); } } @@ -610,29 +593,18 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void) return rte; } -static void iosapic_free_rte (struct iosapic_rte_info *rte) +static inline int irq_is_shared (int irq) { - if (rte->flags & RTE_PREALLOCATED) - list_add_tail(&rte->rte_list, &free_rte_list); - else - kfree(rte); -} - -static inline int vector_is_shared (int vector) -{ - return (iosapic_intr_info[vector].count > 1); + return (iosapic_intr_info[irq].count > 1); } static int -register_intr (unsigned int gsi, int vector, unsigned char delivery, +register_intr (unsigned int gsi, int irq, unsigned char delivery, unsigned long polarity, unsigned long trigger) { irq_desc_t *idesc; struct hw_interrupt_type *irq_type; - int rte_index; int index; - unsigned long gsi_base; - void __iomem *iosapic_address; struct iosapic_rte_info *rte; index = find_iosapic(gsi); @@ -642,10 +614,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, return -ENODEV; } - iosapic_address = iosapic_lists[index].addr; - gsi_base = iosapic_lists[index].gsi_base; - - rte = gsi_vector_to_rte(gsi, vector); + rte = find_rte(irq, gsi); if (!rte) { rte = iosapic_alloc_rte(); if (!rte) { @@ -654,40 +623,42 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, return -ENOMEM; } - rte_index = gsi - gsi_base; - rte->rte_index = rte_index; - rte->addr = iosapic_address; - rte->gsi_base = gsi_base; + rte->iosapic = &iosapic_lists[index]; + rte->rte_index = gsi - rte->iosapic->gsi_base; rte->refcnt++; - list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes); - iosapic_intr_info[vector].count++; + list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes); + iosapic_intr_info[irq].count++; iosapic_lists[index].rtes_inuse++; } - else if (vector_is_shared(vector)) { - struct iosapic_intr_info *info = &iosapic_intr_info[vector]; - if (info->trigger != trigger || info->polarity != polarity) { + else if (rte->refcnt == NO_REF_RTE) { + struct iosapic_intr_info *info = &iosapic_intr_info[irq]; + if (info->count > 0 && + (info->trigger != trigger || info->polarity != polarity)){ printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__); return -EINVAL; } + rte->refcnt++; + iosapic_intr_info[irq].count++; + iosapic_lists[index].rtes_inuse++; } - iosapic_intr_info[vector].polarity = polarity; - iosapic_intr_info[vector].dmode = delivery; - iosapic_intr_info[vector].trigger = trigger; + iosapic_intr_info[irq].polarity = polarity; + iosapic_intr_info[irq].dmode = delivery; + iosapic_intr_info[irq].trigger = trigger; if (trigger == IOSAPIC_EDGE) irq_type = &irq_type_iosapic_edge; else irq_type = &irq_type_iosapic_level; - idesc = irq_desc + vector; + idesc = irq_desc + irq; if (idesc->chip != irq_type) { if (idesc->chip != &no_irq_type) printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", - __FUNCTION__, vector, + __FUNCTION__, irq_to_vector(irq), idesc->chip->name, irq_type->name); idesc->chip = irq_type; } @@ -695,18 +666,19 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, } static unsigned int -get_target_cpu (unsigned int gsi, int vector) +get_target_cpu (unsigned int gsi, int irq) { #ifdef CONFIG_SMP static int cpu = -1; extern int cpe_vector; + cpumask_t domain = irq_to_domain(irq); /* * In case of vector shared by multiple RTEs, all RTEs that * share the vector need to use the same destination CPU. */ - if (!list_empty(&iosapic_intr_info[vector].rtes)) - return iosapic_intr_info[vector].dest; + if (!list_empty(&iosapic_intr_info[irq].rtes)) + return iosapic_intr_info[irq].dest; /* * If the platform supports redirection via XTP, let it @@ -723,7 +695,7 @@ get_target_cpu (unsigned int gsi, int vector) return cpu_physical_id(smp_processor_id()); #ifdef CONFIG_ACPI - if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) + if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR) return get_cpei_target_cpu(); #endif @@ -738,7 +710,7 @@ get_target_cpu (unsigned int gsi, int vector) goto skip_numa_setup; cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); - + cpus_and(cpu_mask, cpu_mask, domain); for_each_cpu_mask(numa_cpu, cpu_mask) { if (!cpu_online(numa_cpu)) cpu_clear(numa_cpu, cpu_mask); @@ -749,8 +721,8 @@ get_target_cpu (unsigned int gsi, int vector) if (!num_cpus) goto skip_numa_setup; - /* Use vector assignment to distribute across cpus in node */ - cpu_index = vector % num_cpus; + /* Use irq assignment to distribute across cpus in node */ + cpu_index = irq % num_cpus; for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) numa_cpu = next_cpu(numa_cpu, cpu_mask); @@ -768,7 +740,7 @@ skip_numa_setup: do { if (++cpu >= NR_CPUS) cpu = 0; - } while (!cpu_online(cpu)); + } while (!cpu_online(cpu) || !cpu_isset(cpu, domain)); return cpu_physical_id(cpu); #else /* CONFIG_SMP */ @@ -785,84 +757,72 @@ int iosapic_register_intr (unsigned int gsi, unsigned long polarity, unsigned long trigger) { - int vector, mask = 1, err; + int irq, mask = 1, err; unsigned int dest; unsigned long flags; struct iosapic_rte_info *rte; u32 low32; -again: + /* * If this GSI has already been registered (i.e., it's a * shared interrupt, or we lost a race to register it), * don't touch the RTE. */ spin_lock_irqsave(&iosapic_lock, flags); - { - vector = gsi_to_vector(gsi); - if (vector > 0) { - rte = gsi_vector_to_rte(gsi, vector); + irq = __gsi_to_irq(gsi); + if (irq > 0) { + rte = find_rte(irq, gsi); + if(iosapic_intr_info[irq].count == 0) { + assign_irq_vector(irq); + dynamic_irq_init(irq); + } else if (rte->refcnt != NO_REF_RTE) { rte->refcnt++; - spin_unlock_irqrestore(&iosapic_lock, flags); - return vector; + goto unlock_iosapic_lock; } - } - spin_unlock_irqrestore(&iosapic_lock, flags); + } else + irq = create_irq(); /* If vector is running out, we try to find a sharable vector */ - vector = assign_irq_vector(AUTO_ASSIGN); - if (vector < 0) { - vector = iosapic_find_sharable_vector(trigger, polarity); - if (vector < 0) - return -ENOSPC; + if (irq < 0) { + irq = iosapic_find_sharable_irq(trigger, polarity); + if (irq < 0) + goto unlock_iosapic_lock; } - spin_lock_irqsave(&irq_desc[vector].lock, flags); - spin_lock(&iosapic_lock); - { - if (gsi_to_vector(gsi) > 0) { - if (list_empty(&iosapic_intr_info[vector].rtes)) - free_irq_vector(vector); - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_desc[vector].lock, - flags); - goto again; - } - - dest = get_target_cpu(gsi, vector); - err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, - polarity, trigger); - if (err < 0) { - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_desc[vector].lock, - flags); - return err; - } - - /* - * If the vector is shared and already unmasked for - * other interrupt sources, don't mask it. - */ - low32 = iosapic_intr_info[vector].low32; - if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK)) - mask = 0; - set_rte(gsi, vector, dest, mask); + spin_lock(&irq_desc[irq].lock); + dest = get_target_cpu(gsi, irq); + err = register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, + polarity, trigger); + if (err < 0) { + irq = err; + goto unlock_all; } - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&irq_desc[vector].lock, flags); + + /* + * If the vector is shared and already unmasked for other + * interrupt sources, don't mask it. + */ + low32 = iosapic_intr_info[irq].low32; + if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK)) + mask = 0; + set_rte(gsi, irq, dest, mask); printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, vector); - - return vector; + cpu_logical_id(dest), dest, irq_to_vector(irq)); + unlock_all: + spin_unlock(&irq_desc[irq].lock); + unlock_iosapic_lock: + spin_unlock_irqrestore(&iosapic_lock, flags); + return irq; } void iosapic_unregister_intr (unsigned int gsi) { unsigned long flags; - int irq, vector, index; + int irq, index; irq_desc_t *idesc; u32 low32; unsigned long trigger, polarity; @@ -881,78 +841,56 @@ iosapic_unregister_intr (unsigned int gsi) WARN_ON(1); return; } - vector = irq_to_vector(irq); - idesc = irq_desc + irq; - spin_lock_irqsave(&idesc->lock, flags); - spin_lock(&iosapic_lock); - { - if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) { - printk(KERN_ERR - "iosapic_unregister_intr(%u) unbalanced\n", - gsi); - WARN_ON(1); - goto out; - } + spin_lock_irqsave(&iosapic_lock, flags); + if ((rte = find_rte(irq, gsi)) == NULL) { + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); + WARN_ON(1); + goto out; + } - if (--rte->refcnt > 0) - goto out; + if (--rte->refcnt > 0) + goto out; - /* Mask the interrupt */ - low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; - iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), - low32); + idesc = irq_desc + irq; + rte->refcnt = NO_REF_RTE; - /* Remove the rte entry from the list */ - list_del(&rte->rte_list); - iosapic_intr_info[vector].count--; - iosapic_free_rte(rte); - index = find_iosapic(gsi); - iosapic_lists[index].rtes_inuse--; - WARN_ON(iosapic_lists[index].rtes_inuse < 0); - - trigger = iosapic_intr_info[vector].trigger; - polarity = iosapic_intr_info[vector].polarity; - dest = iosapic_intr_info[vector].dest; - printk(KERN_INFO - "GSI %u (%s, %s) -> CPU %d (0x%04x)" - " vector %d unregistered\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, vector); + /* Mask the interrupt */ + low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32); - if (list_empty(&iosapic_intr_info[vector].rtes)) { - /* Sanity check */ - BUG_ON(iosapic_intr_info[vector].count); + iosapic_intr_info[irq].count--; + index = find_iosapic(gsi); + iosapic_lists[index].rtes_inuse--; + WARN_ON(iosapic_lists[index].rtes_inuse < 0); - /* Clear the interrupt controller descriptor */ - idesc->chip = &no_irq_type; + trigger = iosapic_intr_info[irq].trigger; + polarity = iosapic_intr_info[irq].polarity; + dest = iosapic_intr_info[irq].dest; + printk(KERN_INFO + "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, irq_to_vector(irq)); + if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP - /* Clear affinity */ - cpus_setall(idesc->affinity); + /* Clear affinity */ + cpus_setall(idesc->affinity); #endif - - /* Clear the interrupt information */ - memset(&iosapic_intr_info[vector], 0, - sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].low32 |= IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); - - if (idesc->action) { - printk(KERN_ERR - "interrupt handlers still exist on" - "IRQ %u\n", irq); - WARN_ON(1); - } - - /* Free the interrupt vector */ - free_irq_vector(vector); - } + /* Clear the interrupt information */ + iosapic_intr_info[irq].dest = 0; + iosapic_intr_info[irq].dmode = 0; + iosapic_intr_info[irq].polarity = 0; + iosapic_intr_info[irq].trigger = 0; + iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; + + /* Destroy and reserve IRQ */ + destroy_and_reserve_irq(irq); } out: - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); + spin_unlock_irqrestore(&iosapic_lock, flags); } /* @@ -965,27 +903,30 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, { static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; unsigned char delivery; - int vector, mask = 0; + int irq, vector, mask = 0; unsigned int dest = ((id << 8) | eid) & 0xffff; switch (int_type) { case ACPI_INTERRUPT_PMI: - vector = iosapic_vector; + irq = vector = iosapic_vector; + bind_irq_vector(irq, vector, CPU_MASK_ALL); /* * since PMI vector is alloc'd by FW(ACPI) not by kernel, * we need to make sure the vector is available */ - iosapic_reassign_vector(vector); + iosapic_reassign_vector(irq); delivery = IOSAPIC_PMI; break; case ACPI_INTERRUPT_INIT: - vector = assign_irq_vector(AUTO_ASSIGN); - if (vector < 0) + irq = create_irq(); + if (irq < 0) panic("%s: out of interrupt vectors!\n", __FUNCTION__); + vector = irq_to_vector(irq); delivery = IOSAPIC_INIT; break; case ACPI_INTERRUPT_CPEI: - vector = IA64_CPE_VECTOR; + irq = vector = IA64_CPE_VECTOR; + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); delivery = IOSAPIC_LOWEST_PRIORITY; mask = 1; break; @@ -995,7 +936,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, return -1; } - register_intr(gsi, vector, delivery, polarity, trigger); + register_intr(gsi, irq, delivery, polarity, trigger); printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" @@ -1005,7 +946,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, vector); - set_rte(gsi, vector, dest, mask); + set_rte(gsi, irq, dest, mask); return vector; } @@ -1017,30 +958,32 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, unsigned long polarity, unsigned long trigger) { - int vector; + int vector, irq; unsigned int dest = cpu_physical_id(smp_processor_id()); - vector = isa_irq_to_vector(isa_irq); - - register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); + irq = vector = isa_irq_to_vector(isa_irq); + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); + register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", polarity == IOSAPIC_POL_HIGH ? "high" : "low", cpu_logical_id(dest), dest, vector); - set_rte(gsi, vector, dest, 1); + set_rte(gsi, irq, dest, 1); } void __init iosapic_system_init (int system_pcat_compat) { - int vector; + int irq; - for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) { - iosapic_intr_info[vector].low32 = IOSAPIC_MASK; + for (irq = 0; irq < NR_IRQS; ++irq) { + iosapic_intr_info[irq].low32 = IOSAPIC_MASK; /* mark as unused */ - INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); + INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); + + iosapic_intr_info[irq].count = 0; } pcat_compat = system_pcat_compat; @@ -1108,31 +1051,35 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) unsigned long flags; spin_lock_irqsave(&iosapic_lock, flags); - { - addr = ioremap(phys_addr, 0); - ver = iosapic_version(addr); + index = find_iosapic(gsi_base); + if (index >= 0) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -EBUSY; + } - if ((err = iosapic_check_gsi_range(gsi_base, ver))) { - iounmap(addr); - spin_unlock_irqrestore(&iosapic_lock, flags); - return err; - } + addr = ioremap(phys_addr, 0); + ver = iosapic_version(addr); + if ((err = iosapic_check_gsi_range(gsi_base, ver))) { + iounmap(addr); + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; + } - /* - * The MAX_REDIR register holds the highest input pin - * number (starting from 0). - * We add 1 so that we can use it for number of pins (= RTEs) - */ - num_rte = ((ver >> 16) & 0xff) + 1; + /* + * The MAX_REDIR register holds the highest input pin number + * (starting from 0). We add 1 so that we can use it for + * number of pins (= RTEs) + */ + num_rte = ((ver >> 16) & 0xff) + 1; - index = iosapic_alloc(); - iosapic_lists[index].addr = addr; - iosapic_lists[index].gsi_base = gsi_base; - iosapic_lists[index].num_rte = num_rte; + index = iosapic_alloc(); + iosapic_lists[index].addr = addr; + iosapic_lists[index].gsi_base = gsi_base; + iosapic_lists[index].num_rte = num_rte; #ifdef CONFIG_NUMA - iosapic_lists[index].node = MAX_NUMNODES; + iosapic_lists[index].node = MAX_NUMNODES; #endif - } + spin_lock_init(&iosapic_lists[index].lock); spin_unlock_irqrestore(&iosapic_lock, flags); if ((gsi_base == 0) && pcat_compat) { @@ -1157,25 +1104,22 @@ iosapic_remove (unsigned int gsi_base) unsigned long flags; spin_lock_irqsave(&iosapic_lock, flags); - { - index = find_iosapic(gsi_base); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", - __FUNCTION__, gsi_base); - goto out; - } - - if (iosapic_lists[index].rtes_inuse) { - err = -EBUSY; - printk(KERN_WARNING - "%s: IOSAPIC for GSI base %u is busy\n", - __FUNCTION__, gsi_base); - goto out; - } + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", + __FUNCTION__, gsi_base); + goto out; + } - iounmap(iosapic_lists[index].addr); - iosapic_free(index); + if (iosapic_lists[index].rtes_inuse) { + err = -EBUSY; + printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n", + __FUNCTION__, gsi_base); + goto out; } + + iounmap(iosapic_lists[index].addr); + iosapic_free(index); out: spin_unlock_irqrestore(&iosapic_lock, flags); return err; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 407b45870489..cc3ee4ef37af 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -35,7 +35,7 @@ void ack_bad_irq(unsigned int irq) #ifdef CONFIG_IA64_GENERIC unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return (unsigned int) vec; + return __get_cpu_var(vector_irq)[vec]; } #endif diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index bc47049f060f..91797c111162 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -46,6 +46,12 @@ #define IRQ_DEBUG 0 +#define IRQ_VECTOR_UNASSIGNED (0) + +#define IRQ_UNUSED (0) +#define IRQ_USED (1) +#define IRQ_RSVD (2) + /* These can be overridden in platform_irq_init */ int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; @@ -54,6 +60,8 @@ int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; void __iomem *ipi_base_addr = ((void __iomem *) (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); +static cpumask_t vector_allocation_domain(int cpu); + /* * Legacy IRQ to IA-64 vector translation table. */ @@ -64,46 +72,269 @@ __u8 isa_irq_to_vector_map[16] = { }; EXPORT_SYMBOL(isa_irq_to_vector_map); -static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)]; +DEFINE_SPINLOCK(vector_lock); + +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { + [0 ... NR_IRQS - 1] = { + .vector = IRQ_VECTOR_UNASSIGNED, + .domain = CPU_MASK_NONE + } +}; + +DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = { + [0 ... IA64_NUM_VECTORS - 1] = IA64_SPURIOUS_INT_VECTOR +}; + +static cpumask_t vector_table[IA64_MAX_DEVICE_VECTORS] = { + [0 ... IA64_MAX_DEVICE_VECTORS - 1] = CPU_MASK_NONE +}; + +static int irq_status[NR_IRQS] = { + [0 ... NR_IRQS -1] = IRQ_UNUSED +}; + +int check_irq_used(int irq) +{ + if (irq_status[irq] == IRQ_USED) + return 1; + + return -1; +} + +static void reserve_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + irq_status[irq] = IRQ_RSVD; + spin_unlock_irqrestore(&vector_lock, flags); +} + +static inline int find_unassigned_irq(void) +{ + int irq; + + for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++) + if (irq_status[irq] == IRQ_UNUSED) + return irq; + return -ENOSPC; +} + +static inline int find_unassigned_vector(cpumask_t domain) +{ + cpumask_t mask; + int pos; + + cpus_and(mask, domain, cpu_online_map); + if (cpus_empty(mask)) + return -EINVAL; + + for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) { + cpus_and(mask, domain, vector_table[pos]); + if (!cpus_empty(mask)) + continue; + return IA64_FIRST_DEVICE_VECTOR + pos; + } + return -ENOSPC; +} + +static int __bind_irq_vector(int irq, int vector, cpumask_t domain) +{ + cpumask_t mask; + int cpu, pos; + struct irq_cfg *cfg = &irq_cfg[irq]; + + cpus_and(mask, domain, cpu_online_map); + if (cpus_empty(mask)) + return -EINVAL; + if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain)) + return 0; + if (cfg->vector != IRQ_VECTOR_UNASSIGNED) + return -EBUSY; + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + irq_status[irq] = IRQ_USED; + pos = vector - IA64_FIRST_DEVICE_VECTOR; + cpus_or(vector_table[pos], vector_table[pos], domain); + return 0; +} + +int bind_irq_vector(int irq, int vector, cpumask_t domain) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __bind_irq_vector(irq, vector, domain); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +static void __clear_irq_vector(int irq) +{ + int vector, cpu, pos; + cpumask_t mask; + cpumask_t domain; + struct irq_cfg *cfg = &irq_cfg[irq]; + + BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); + vector = cfg->vector; + domain = cfg->domain; + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = IA64_SPURIOUS_INT_VECTOR; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; + irq_status[irq] = IRQ_UNUSED; + pos = vector - IA64_FIRST_DEVICE_VECTOR; + cpus_andnot(vector_table[pos], vector_table[pos], domain); +} + +static void clear_irq_vector(int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); +} int assign_irq_vector (int irq) { - int pos, vector; - again: - pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS); - vector = IA64_FIRST_DEVICE_VECTOR + pos; - if (vector > IA64_LAST_DEVICE_VECTOR) - return -ENOSPC; - if (test_and_set_bit(pos, ia64_vector_mask)) - goto again; + unsigned long flags; + int vector, cpu; + cpumask_t domain; + + vector = -ENOSPC; + + spin_lock_irqsave(&vector_lock, flags); + if (irq < 0) { + goto out; + } + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } + if (vector < 0) + goto out; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + out: + spin_unlock_irqrestore(&vector_lock, flags); return vector; } void free_irq_vector (int vector) { - int pos; - - if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) return; - - pos = vector - IA64_FIRST_DEVICE_VECTOR; - if (!test_and_clear_bit(pos, ia64_vector_mask)) - printk(KERN_WARNING "%s: double free!\n", __FUNCTION__); + clear_irq_vector(vector); } int reserve_irq_vector (int vector) { - int pos; - if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) return -EINVAL; + return !!bind_irq_vector(vector, vector, CPU_MASK_ALL); +} - pos = vector - IA64_FIRST_DEVICE_VECTOR; - return test_and_set_bit(pos, ia64_vector_mask); +/* + * Initialize vector_irq on a new cpu. This function must be called + * with vector_lock held. + */ +void __setup_vector_irq(int cpu) +{ + int irq, vector; + + /* Clear vector_irq */ + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) + per_cpu(vector_irq, cpu)[vector] = IA64_SPURIOUS_INT_VECTOR; + /* Mark the inuse vectors */ + for (irq = 0; irq < NR_IRQS; ++irq) { + if (!cpu_isset(cpu, irq_cfg[irq].domain)) + continue; + vector = irq_to_vector(irq); + per_cpu(vector_irq, cpu)[vector] = irq; + } +} + +#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)) +static enum vector_domain_type { + VECTOR_DOMAIN_NONE, + VECTOR_DOMAIN_PERCPU +} vector_domain_type = VECTOR_DOMAIN_NONE; + +static cpumask_t vector_allocation_domain(int cpu) +{ + if (vector_domain_type == VECTOR_DOMAIN_PERCPU) + return cpumask_of_cpu(cpu); + return CPU_MASK_ALL; +} + +static int __init parse_vector_domain(char *arg) +{ + if (!arg) + return -EINVAL; + if (!strcmp(arg, "percpu")) { + vector_domain_type = VECTOR_DOMAIN_PERCPU; + no_int_routing = 1; + } + return 1; +} +early_param("vector", parse_vector_domain); +#else +static cpumask_t vector_allocation_domain(int cpu) +{ + return CPU_MASK_ALL; +} +#endif + + +void destroy_and_reserve_irq(unsigned int irq) +{ + dynamic_irq_cleanup(irq); + + clear_irq_vector(irq); + reserve_irq(irq); +} + +static int __reassign_irq_vector(int irq, int cpu) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + int vector; + cpumask_t domain; + + if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) + return -EINVAL; + if (cpu_isset(cpu, cfg->domain)) + return 0; + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector < 0) + return -ENOSPC; + __clear_irq_vector(irq); + BUG_ON(__bind_irq_vector(irq, vector, domain)); + return 0; +} + +int reassign_irq_vector(int irq, int cpu) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __reassign_irq_vector(irq, cpu); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; } /* @@ -111,18 +342,35 @@ reserve_irq_vector (int vector) */ int create_irq(void) { - int vector = assign_irq_vector(AUTO_ASSIGN); - - if (vector >= 0) - dynamic_irq_init(vector); - - return vector; + unsigned long flags; + int irq, vector, cpu; + cpumask_t domain; + + irq = vector = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } + if (vector < 0) + goto out; + irq = find_unassigned_irq(); + if (irq < 0) + goto out; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + out: + spin_unlock_irqrestore(&vector_lock, flags); + if (irq >= 0) + dynamic_irq_init(irq); + return irq; } void destroy_irq(unsigned int irq) { dynamic_irq_cleanup(irq); - free_irq_vector(irq); + clear_irq_vector(irq); } #ifdef CONFIG_SMP @@ -301,14 +549,13 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action) irq_desc_t *desc; unsigned int irq; - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == vec) { - desc = irq_desc + irq; - desc->status |= IRQ_PER_CPU; - desc->chip = &irq_type_ia64_lsapic; - if (action) - setup_irq(irq, action); - } + irq = vec; + BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL)); + desc = irq_desc + irq; + desc->status |= IRQ_PER_CPU; + desc->chip = &irq_type_ia64_lsapic; + if (action) + setup_irq(irq, action); } void __init diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 5bc46f151344..5dc98b5abcfb 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -936,10 +936,15 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg) return; } +unsigned long arch_deref_entry_point(void *entry) +{ + return ((struct fnptr *)entry)->ip; +} + int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + unsigned long addr = arch_deref_entry_point(jp->entry); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct param_bsp_cfm pa; int bytes; diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index c81080df70df..2fdbd5c3f213 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -13,6 +13,7 @@ #define MSI_DATA_VECTOR_SHIFT 0 #define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) +#define MSI_DATA_VECTOR_MASK 0xffffff00 #define MSI_DATA_DELIVERY_SHIFT 8 #define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT) @@ -50,17 +51,29 @@ static struct irq_chip ia64_msi_chip; static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) { struct msi_msg msg; - u32 addr; + u32 addr, data; + int cpu = first_cpu(cpu_mask); + + if (!cpu_online(cpu)) + return; + + if (reassign_irq_vector(irq, cpu)) + return; read_msi_msg(irq, &msg); addr = msg.address_lo; addr &= MSI_ADDR_DESTID_MASK; - addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(first_cpu(cpu_mask))); + addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); msg.address_lo = addr; + data = msg.data; + data &= MSI_DATA_VECTOR_MASK; + data |= MSI_DATA_VECTOR(irq_to_vector(irq)); + msg.data = data; + write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpu_mask; + irq_desc[irq].affinity = cpumask_of_cpu(cpu); } #endif /* CONFIG_SMP */ @@ -69,13 +82,15 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) struct msi_msg msg; unsigned long dest_phys_id; int irq, vector; + cpumask_t mask; irq = create_irq(); if (irq < 0) return irq; set_irq_msi(irq, desc); - dest_phys_id = cpu_physical_id(first_cpu(cpu_online_map)); + cpus_and(mask, irq_to_domain(irq), cpu_online_map); + dest_phys_id = cpu_physical_id(first_cpu(mask)); vector = irq_to_vector(irq); msg.address_hi = 0; diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index b3a47f986e1e..9f72838db26e 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -82,7 +82,7 @@ static volatile struct call_data_struct *call_data; #define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ -static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation); extern void cpu_halt (void); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 3c9d8e6089cf..9f5c90b594b9 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -395,9 +395,13 @@ smp_callin (void) fix_b0_for_bsp(); lock_ipi_calllock(); + spin_lock(&vector_lock); + /* Setup the per cpu irq handling data structures */ + __setup_vector_irq(cpuid); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); per_cpu(cpu_state, cpuid) = CPU_ONLINE; + spin_unlock(&vector_lock); smp_setup_percpu_timer(); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5a65965c8b53..860f251d2fc2 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -206,6 +206,7 @@ SECTIONS { __per_cpu_start = .; *(.data.percpu) + *(.data.percpu.shared_aligned) __per_cpu_end = .; } . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index b87f785c2416..73ccb6010c05 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -80,6 +80,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re struct mm_struct *mm = current->mm; struct siginfo si; unsigned long mask; + int fault; /* mmap_sem is performance critical.... */ prefetchw(&mm->mmap_sem); @@ -147,26 +148,25 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re * sure we exit gracefully rather than endlessly redo the * fault. */ - switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) { - case VM_FAULT_MINOR: - ++current->min_flt; - break; - case VM_FAULT_MAJOR: - ++current->maj_flt; - break; - case VM_FAULT_SIGBUS: + fault = handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0); + if (unlikely(fault & VM_FAULT_ERROR)) { /* * We ran out of memory, or some other thing happened * to us that made us unable to handle the page fault * gracefully. */ - signal = SIGBUS; - goto bad_area; - case VM_FAULT_OOM: - goto out_of_memory; - default: + if (fault & VM_FAULT_OOM) { + goto out_of_memory; + } else if (fault & VM_FAULT_SIGBUS) { + signal = SIGBUS; + goto bad_area; + } BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 4e2d5b9f0a9a..942a8c7a4417 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -110,10 +110,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) . = ALIGN(4096); __init_end = .; /* freed after init ends here */ diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index f3935ba24946..676a1c443d28 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -80,6 +80,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, struct vm_area_struct * vma; unsigned long page, addr; int write; + int fault; siginfo_t info; /* @@ -195,20 +196,18 @@ survive: */ addr = (address & PAGE_MASK); set_thread_fault_code(error_code); - switch (handle_mm_fault(mm, vma, addr, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: + fault = handle_mm_fault(mm, vma, addr, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) goto out_of_memory; - default: - BUG(); + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; set_thread_fault_code(0); up_read(&mm->mmap_sem); return; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 2adbeb16e1b8..578b48f47b9e 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -159,18 +159,17 @@ good_area: #ifdef DEBUG printk("handle_mm_fault returns %d\n",fault); #endif - switch (fault) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto bus_err; - default: - goto out_of_memory; + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto bus_err; + BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return 0; diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index adc64a2bafbb..1175ceff8b2a 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -45,6 +45,10 @@ config GENERIC_HWEIGHT bool default y +config GENERIC_HARDIRQS + bool + default y + config GENERIC_CALIBRATE_DELAY bool default y diff --git a/arch/m68knommu/kernel/Makefile b/arch/m68knommu/kernel/Makefile index 1c6cd1ab571e..1524b39ad63f 100644 --- a/arch/m68knommu/kernel/Makefile +++ b/arch/m68knommu/kernel/Makefile @@ -4,8 +4,8 @@ extra-y := vmlinux.lds -obj-y += dma.o entry.o init_task.o m68k_ksyms.o process.o ptrace.o semaphore.o \ - setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o +obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \ + semaphore.o setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_COMEMPCI) += comempci.o diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c index 7cd183d346ef..d97b89bae53c 100644 --- a/arch/m68knommu/kernel/asm-offsets.c +++ b/arch/m68knommu/kernel/asm-offsets.c @@ -15,7 +15,6 @@ #include <linux/hardirq.h> #include <asm/bootinfo.h> #include <asm/irq.h> -#include <asm/irqnode.h> #include <asm/thread_info.h> #define DEFINE(sym, val) \ @@ -72,10 +71,6 @@ int main(void) #else /* bitfields are a bit difficult */ DEFINE(PT_VECTOR, offsetof(struct pt_regs, pc) + 4); - /* offsets into the irq_handler struct */ - DEFINE(IRQ_HANDLER, offsetof(struct irq_node, handler)); - DEFINE(IRQ_DEVID, offsetof(struct irq_node, dev_id)); - DEFINE(IRQ_NEXT, offsetof(struct irq_node, next)); #endif /* offsets into the kernel_stat struct */ diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68knommu/kernel/irq.c new file mode 100644 index 000000000000..bba1bb48a21f --- /dev/null +++ b/arch/m68knommu/kernel/irq.c @@ -0,0 +1,82 @@ +/* + * irq.c + * + * (C) Copyright 2007, Greg Ungerer <gerg@snapgear.com> + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include <linux/types.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <asm/system.h> +#include <asm/traps.h> + +asmlinkage void do_IRQ(int irq, struct pt_regs *regs) +{ + struct pt_regs *oldregs = set_irq_regs(regs); + + irq_enter(); + __do_IRQ(irq); + irq_exit(); + + set_irq_regs(oldregs); +} + +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "IRQ: unexpected irq=%d\n", irq); +} + +static struct irq_chip m_irq_chip = { + .name = "M68K-INTC", + .enable = enable_vector, + .disable = disable_vector, + .ack = ack_vector, +}; + +void __init init_IRQ(void) +{ + int irq; + + init_vectors(); + + for (irq = 0; (irq < NR_IRQS); irq++) { + irq_desc[irq].status = IRQ_DISABLED; + irq_desc[irq].action = NULL; + irq_desc[irq].depth = 1; + irq_desc[irq].chip = &m_irq_chip; + } +} + +int show_interrupts(struct seq_file *p, void *v) +{ + struct irqaction *ap; + int irq = *((loff_t *) v); + + if (irq == 0) + seq_puts(p, " CPU0\n"); + + if (irq < NR_IRQS) { + ap = irq_desc[irq].action; + if (ap) { + seq_printf(p, "%3d: ", irq); + seq_printf(p, "%10u ", kstat_irqs(irq)); + seq_printf(p, "%14s ", irq_desc[irq].chip->name); + + seq_printf(p, "%s", ap->name); + for (ap = ap->next; ap; ap = ap->next) + seq_printf(p, ", %s", ap->name); + seq_putc(p, '\n'); + } + } + + return 0; +} + diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c index 25327c9eadd7..f795062aba1e 100644 --- a/arch/m68knommu/kernel/m68k_ksyms.c +++ b/arch/m68knommu/kernel/m68k_ksyms.c @@ -81,8 +81,6 @@ EXPORT_SYMBOL(__mulsi3); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); -EXPORT_SYMBOL(is_in_rom); - #ifdef CONFIG_COLDFIRE extern unsigned int *dma_device_address; extern unsigned long dma_base_addr, _ramend; diff --git a/arch/m68knommu/kernel/traps.c b/arch/m68knommu/kernel/traps.c index fde04e1757f7..437a061d8b94 100644 --- a/arch/m68knommu/kernel/traps.c +++ b/arch/m68knommu/kernel/traps.c @@ -62,8 +62,6 @@ static char const * const vec_names[] = { void __init trap_init(void) { - if (mach_trap_init) - mach_trap_init(); } void die_if_kernel(char *str, struct pt_regs *fp, int nr) @@ -82,7 +80,7 @@ void die_if_kernel(char *str, struct pt_regs *fp, int nr) printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n", current->comm, current->pid, PAGE_SIZE+(unsigned long)current); - show_stack(NULL, (unsigned long *)fp); + show_stack(NULL, (unsigned long *)(fp + 1)); add_taint(TAINT_DIE); do_exit(SIGSEGV); } diff --git a/arch/m68knommu/mm/memory.c b/arch/m68knommu/mm/memory.c index 1a66b71035a4..f93b88b51f9f 100644 --- a/arch/m68knommu/mm/memory.c +++ b/arch/m68knommu/mm/memory.c @@ -33,23 +33,3 @@ unsigned long kernel_map(unsigned long paddr, unsigned long size, return paddr; } - -int is_in_rom(unsigned long addr) -{ - extern unsigned long _ramstart, _ramend; - - /* - * What we are really trying to do is determine if addr is - * in an allocated kernel memory region. If not then assume - * we cannot free it or otherwise de-allocate it. Ideally - * we could restrict this to really being in a ROM or flash, - * but that would need to be done on a board by board basis, - * not globally. - */ - if ((addr < _ramstart) || (addr >= _ramend)) - return(1); - - /* Default case, not in ROM */ - return(0); -} - diff --git a/arch/m68knommu/platform/5307/Makefile b/arch/m68knommu/platform/5307/Makefile index 2fd37dcc309b..719a313494bc 100644 --- a/arch/m68knommu/platform/5307/Makefile +++ b/arch/m68knommu/platform/5307/Makefile @@ -16,7 +16,7 @@ ifdef CONFIG_FULLDEBUG AFLAGS += -DDEBUGGER_COMPATIBLE_CACHE=1 endif -obj-$(CONFIG_COLDFIRE) += entry.o vectors.o ints.o +obj-$(CONFIG_COLDFIRE) += entry.o vectors.o obj-$(CONFIG_M5206) += timers.o obj-$(CONFIG_M5206e) += timers.o obj-$(CONFIG_M520x) += pit.o diff --git a/arch/m68knommu/platform/5307/entry.S b/arch/m68knommu/platform/5307/entry.S index f0dba84d9101..c358aebe0af3 100644 --- a/arch/m68knommu/platform/5307/entry.S +++ b/arch/m68knommu/platform/5307/entry.S @@ -1,7 +1,7 @@ /* * linux/arch/m68knommu/platform/5307/entry.S * - * Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com) + * Copyright (C) 1999-2007, Greg Ungerer (gerg@snapgear.com) * Copyright (C) 1998 D. Jeff Dionne <jeff@lineo.ca>, * Kenneth Albanowski <kjahds@kjahds.com>, * Copyright (C) 2000 Lineo Inc. (www.lineo.com) @@ -155,34 +155,21 @@ Lsignal_return: /* * This is the generic interrupt handler (for all hardware interrupt - * sources). It figures out the vector number and calls the appropriate - * interrupt service routine directly. + * sources). Calls upto high level code to do all the work. */ ENTRY(inthandler) SAVE_ALL moveq #-1,%d0 movel %d0,%sp@(PT_ORIG_D0) - addql #1,local_irq_count movew %sp@(PT_FORMATVEC),%d0 /* put exception # in d0 */ andl #0x03fc,%d0 /* mask out vector only */ - leal per_cpu__kstat+STAT_IRQ,%a0 - addql #1,%a0@(%d0) - + movel %sp,%sp@- /* push regs arg */ lsrl #2,%d0 /* calculate real vector # */ - movel %d0,%d1 /* calculate array offset */ - lsll #4,%d1 - lea irq_list,%a0 - addl %d1,%a0 /* pointer to array struct */ - - movel %sp,%sp@- /* push regs arg onto stack */ - movel %a0@(8),%sp@- /* push devid arg */ - movel %d0,%sp@- /* push vector # on stack */ - - movel %a0@,%a0 /* get function to call */ - jbsr %a0@ /* call vector handler */ - lea %sp@(12),%sp /* pop parameters off stack */ + movel %d0,%sp@- /* push vector number */ + jbsr do_IRQ /* call high level irq handler */ + lea %sp@(8),%sp /* pop args off stack */ bra ret_from_interrupt /* this was fallthrough */ @@ -198,24 +185,15 @@ ENTRY(fasthandler) movew %sp@(PT_FORMATVEC),%d0 andl #0x03fc,%d0 /* mask out vector only */ - leal per_cpu__kstat+STAT_IRQ,%a0 - addql #1,%a0@(%d0) - - movel %sp,%sp@- /* push regs arg onto stack */ - clrl %sp@- /* push devid arg */ + movel %sp,%sp@- /* push regs arg */ lsrl #2,%d0 /* calculate real vector # */ - movel %d0,%sp@- /* push vector # on stack */ - - lsll #4,%d0 /* adjust for array offset */ - lea irq_list,%a0 - movel %a0@(%d0),%a0 /* get function to call */ - jbsr %a0@ /* call vector handler */ - lea %sp@(12),%sp /* pop parameters off stack */ + movel %d0,%sp@- /* push vector number */ + jbsr do_IRQ /* call high level irq handler */ + lea %sp@(8),%sp /* pop args off stack */ RESTORE_LOCAL ENTRY(ret_from_interrupt) - subql #1,local_irq_count jeq 2f 1: RESTORE_ALL diff --git a/arch/m68knommu/platform/5307/ints.c b/arch/m68knommu/platform/5307/ints.c deleted file mode 100644 index 751633038c4b..000000000000 --- a/arch/m68knommu/platform/5307/ints.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * linux/arch/m68knommu/kernel/ints.c -- General interrupt handling code - * - * Copyright (C) 1999-2002 Greg Ungerer (gerg@snapgear.com) - * Copyright (C) 1998 D. Jeff Dionne <jeff@lineo.ca>, - * Kenneth Albanowski <kjahds@kjahds.com>, - * Copyright (C) 2000 Lineo Inc. (www.lineo.com) - * - * Based on: - * - * linux/arch/m68k/kernel/ints.c -- Linux/m68k general interrupt handling code - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/kernel_stat.h> -#include <linux/errno.h> -#include <linux/seq_file.h> - -#include <asm/system.h> -#include <asm/irq.h> -#include <asm/irqnode.h> -#include <asm/traps.h> -#include <asm/page.h> -#include <asm/machdep.h> - -/* - * This table stores the address info for each vector handler. - */ -struct irq_entry irq_list[SYS_IRQS]; - -#define NUM_IRQ_NODES 16 -static irq_node_t nodes[NUM_IRQ_NODES]; - -/* The number of spurious interrupts */ -volatile unsigned int num_spurious; - -unsigned int local_irq_count[NR_CPUS]; - -static irqreturn_t default_irq_handler(int irq, void *ptr) -{ -#if 1 - printk(KERN_INFO "%s(%d): default irq handler vec=%d [0x%x]\n", - __FILE__, __LINE__, irq, irq); -#endif - return(IRQ_HANDLED); -} - -/* - * void init_IRQ(void) - * - * Parameters: None - * - * Returns: Nothing - * - * This function should be called during kernel startup to initialize - * the IRQ handling routines. - */ - -void __init init_IRQ(void) -{ - int i; - - for (i = 0; i < SYS_IRQS; i++) { - if (mach_default_handler) - irq_list[i].handler = mach_default_handler; - else - irq_list[i].handler = default_irq_handler; - irq_list[i].flags = IRQ_FLG_STD; - irq_list[i].dev_id = NULL; - irq_list[i].devname = NULL; - } - - for (i = 0; i < NUM_IRQ_NODES; i++) - nodes[i].handler = NULL; - - if (mach_init_IRQ) - mach_init_IRQ(); -} - -irq_node_t *new_irq_node(void) -{ - irq_node_t *node; - short i; - - for (node = nodes, i = NUM_IRQ_NODES-1; i >= 0; node++, i--) - if (!node->handler) - return node; - - printk(KERN_INFO "new_irq_node: out of nodes\n"); - return NULL; -} - -int request_irq( - unsigned int irq, - irq_handler_t handler, - unsigned long flags, - const char *devname, - void *dev_id) -{ - if (irq < 0 || irq >= NR_IRQS) { - printk(KERN_WARNING "%s: Incorrect IRQ %d from %s\n", __FUNCTION__, - irq, devname); - return -ENXIO; - } - - if (!(irq_list[irq].flags & IRQ_FLG_STD)) { - if (irq_list[irq].flags & IRQ_FLG_LOCK) { - printk(KERN_WARNING "%s: IRQ %d from %s is not replaceable\n", - __FUNCTION__, irq, irq_list[irq].devname); - return -EBUSY; - } - if (flags & IRQ_FLG_REPLACE) { - printk(KERN_WARNING "%s: %s can't replace IRQ %d from %s\n", - __FUNCTION__, devname, irq, irq_list[irq].devname); - return -EBUSY; - } - } - - if (flags & IRQ_FLG_FAST) { - extern asmlinkage void fasthandler(void); - extern void set_evector(int vecnum, void (*handler)(void)); - set_evector(irq, fasthandler); - } - - irq_list[irq].handler = handler; - irq_list[irq].flags = flags; - irq_list[irq].dev_id = dev_id; - irq_list[irq].devname = devname; - return 0; -} - -EXPORT_SYMBOL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - if (irq >= NR_IRQS) { - printk(KERN_WARNING "%s: Incorrect IRQ %d\n", __FUNCTION__, irq); - return; - } - - if (irq_list[irq].dev_id != dev_id) - printk(KERN_WARNING "%s: Removing probably wrong IRQ %d from %s\n", - __FUNCTION__, irq, irq_list[irq].devname); - - if (irq_list[irq].flags & IRQ_FLG_FAST) { - extern asmlinkage void inthandler(void); - extern void set_evector(int vecnum, void (*handler)(void)); - set_evector(irq, inthandler); - } - - if (mach_default_handler) - irq_list[irq].handler = mach_default_handler; - else - irq_list[irq].handler = default_irq_handler; - irq_list[irq].flags = IRQ_FLG_STD; - irq_list[irq].dev_id = NULL; - irq_list[irq].devname = NULL; -} - -EXPORT_SYMBOL(free_irq); - - -int sys_request_irq(unsigned int irq, irq_handler_t handler, - unsigned long flags, const char *devname, void *dev_id) -{ - if (irq > IRQ7) { - printk(KERN_WARNING "%s: Incorrect IRQ %d from %s\n", - __FUNCTION__, irq, devname); - return -ENXIO; - } - -#if 0 - if (!(irq_list[irq].flags & IRQ_FLG_STD)) { - if (irq_list[irq].flags & IRQ_FLG_LOCK) { - printk(KERN_WARNING "%s: IRQ %d from %s is not replaceable\n", - __FUNCTION__, irq, irq_list[irq].devname); - return -EBUSY; - } - if (!(flags & IRQ_FLG_REPLACE)) { - printk(KERN_WARNING "%s: %s can't replace IRQ %d from %s\n", - __FUNCTION__, devname, irq, irq_list[irq].devname); - return -EBUSY; - } - } -#endif - - irq_list[irq].handler = handler; - irq_list[irq].flags = flags; - irq_list[irq].dev_id = dev_id; - irq_list[irq].devname = devname; - return 0; -} - -void sys_free_irq(unsigned int irq, void *dev_id) -{ - if (irq > IRQ7) { - printk(KERN_WARNING "%s: Incorrect IRQ %d\n", __FUNCTION__, irq); - return; - } - - if (irq_list[irq].dev_id != dev_id) - printk(KERN_WARNING "%s: Removing probably wrong IRQ %d from %s\n", - __FUNCTION__, irq, irq_list[irq].devname); - - irq_list[irq].handler = mach_default_handler; - irq_list[irq].flags = 0; - irq_list[irq].dev_id = NULL; - irq_list[irq].devname = NULL; -} - -/* - * Do we need these probe functions on the m68k? - * - * ... may be useful with ISA devices - */ -unsigned long probe_irq_on (void) -{ - return 0; -} - -EXPORT_SYMBOL(probe_irq_on); - -int probe_irq_off (unsigned long irqs) -{ - return 0; -} - -EXPORT_SYMBOL(probe_irq_off); - -asmlinkage void process_int(unsigned long vec, struct pt_regs *fp) -{ - if (vec >= VEC_INT1 && vec <= VEC_INT7) { - vec -= VEC_SPUR; - kstat_cpu(0).irqs[vec]++; - irq_list[vec].handler(vec, irq_list[vec].dev_id); - } else { - if (mach_process_int) - mach_process_int(vec, fp); - else - panic("Can't process interrupt vector %ld\n", vec); - return; - } -} - - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v; - - if (i < NR_IRQS) { - if (! (irq_list[i].flags & IRQ_FLG_STD)) { - seq_printf(p, "%3d: %10u ", i, - (i ? kstat_cpu(0).irqs[i] : num_spurious)); - if (irq_list[i].flags & IRQ_FLG_LOCK) - seq_printf(p, "L "); - else - seq_printf(p, " "); - seq_printf(p, "%s\n", irq_list[i].devname); - } - } - - if (i == NR_IRQS && mach_get_irq_list) - mach_get_irq_list(p, v); - return 0; -} - -void init_irq_proc(void) -{ - /* Insert /proc/irq driver here */ -} - diff --git a/arch/m68knommu/platform/5307/vectors.c b/arch/m68knommu/platform/5307/vectors.c index 2a8b0d044ce5..6cf894620234 100644 --- a/arch/m68knommu/platform/5307/vectors.c +++ b/arch/m68knommu/platform/5307/vectors.c @@ -3,23 +3,17 @@ /* * linux/arch/m68knommu/platform/5307/vectors.c * - * Copyright (C) 1999-2003, Greg Ungerer <gerg@snapgear.com> + * Copyright (C) 1999-2007, Greg Ungerer <gerg@snapgear.com> */ /***************************************************************************/ #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/param.h> #include <linux/init.h> -#include <linux/unistd.h> -#include <linux/delay.h> -#include <asm/irq.h> -#include <asm/dma.h> +#include <linux/irq.h> #include <asm/traps.h> #include <asm/machdep.h> #include <asm/coldfire.h> -#include <asm/mcftimer.h> #include <asm/mcfsim.h> #include <asm/mcfdma.h> #include <asm/mcfwdebug.h> @@ -56,7 +50,7 @@ asmlinkage void trap(void); asmlinkage void system_call(void); asmlinkage void inthandler(void); -void __init coldfire_trap_init(void) +void __init init_vectors(void) { int i; @@ -86,6 +80,23 @@ void __init coldfire_trap_init(void) /***************************************************************************/ +void enable_vector(unsigned int irq) +{ + /* Currently no action on ColdFire */ +} + +void disable_vector(unsigned int irq) +{ + /* Currently no action on ColdFire */ +} + +void ack_vector(unsigned int irq) +{ + /* Currently no action on ColdFire */ +} + +/***************************************************************************/ + void coldfire_reset(void) { HARD_RESET_NOW(); diff --git a/arch/m68knommu/platform/68328/entry.S b/arch/m68knommu/platform/68328/entry.S index f97862715450..b1aef72f3baf 100644 --- a/arch/m68knommu/platform/68328/entry.S +++ b/arch/m68knommu/platform/68328/entry.S @@ -133,7 +133,6 @@ Lreturn: */ inthandler1: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -145,7 +144,6 @@ inthandler1: inthandler2: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -157,7 +155,6 @@ inthandler2: inthandler3: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -169,7 +166,6 @@ inthandler3: inthandler4: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -181,7 +177,6 @@ inthandler4: inthandler5: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -193,7 +188,6 @@ inthandler5: inthandler6: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -205,7 +199,6 @@ inthandler6: inthandler7: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -217,7 +210,6 @@ inthandler7: inthandler: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and #0x3ff, %d0 @@ -228,7 +220,6 @@ inthandler: bra ret_from_interrupt ret_from_interrupt: - subql #1,local_irq_count jeq 1f 2: RESTORE_ALL @@ -238,7 +229,6 @@ ret_from_interrupt: jhi 2b /* check if we need to do software interrupts */ - movel local_irq_count,%d0 jeq ret_from_exception pea ret_from_exception diff --git a/arch/m68knommu/platform/68328/ints.c b/arch/m68knommu/platform/68328/ints.c index 3de6e337554e..72e56d554f4f 100644 --- a/arch/m68knommu/platform/68328/ints.c +++ b/arch/m68knommu/platform/68328/ints.c @@ -9,21 +9,14 @@ * Copyright 1999 D. Jeff Dionne <jeff@rt-control.com> */ -#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/kernel_stat.h> -#include <linux/errno.h> +#include <linux/init.h> #include <linux/interrupt.h> - -#include <asm/system.h> -#include <asm/irq.h> -#include <asm/irqnode.h> +#include <linux/irq.h> #include <asm/traps.h> #include <asm/io.h> #include <asm/machdep.h> -#include <asm/setup.h> #if defined(CONFIG_M68328) #include <asm/MC68328.h> @@ -79,16 +72,12 @@ extern e_vector *_ramvec; /* The number of spurious interrupts */ volatile unsigned int num_spurious; -unsigned int local_irq_count[NR_CPUS]; - -/* irq node variables for the 32 (potential) on chip sources */ -static irq_node_t int_irq_list[NR_IRQS]; /* * This function should be called during kernel startup to initialize - * the IRQ handling routines. + * the machine vector table. */ -void init_IRQ(void) +void __init init_vectors(void) { int i; @@ -108,96 +97,10 @@ void init_IRQ(void) IVR = 0x40; /* Set DragonBall IVR (interrupt base) to 64 */ - /* initialize handlers */ - for (i = 0; i < NR_IRQS; i++) { - int_irq_list[i].handler = bad_interrupt; - int_irq_list[i].flags = IRQ_FLG_STD; - int_irq_list[i].dev_id = NULL; - int_irq_list[i].devname = NULL; - } - /* turn off all interrupts */ IMR = ~0; } -int request_irq( - unsigned int irq, - irq_handler_t handler, - unsigned long flags, - const char *devname, - void *dev_id) -{ - if (irq >= NR_IRQS) { - printk (KERN_ERR "%s: Unknown IRQ %d from %s\n", __FUNCTION__, irq, devname); - return -ENXIO; - } - - if (!(int_irq_list[irq].flags & IRQ_FLG_STD)) { - if (int_irq_list[irq].flags & IRQ_FLG_LOCK) { - printk(KERN_ERR "%s: IRQ %d from %s is not replaceable\n", - __FUNCTION__, irq, int_irq_list[irq].devname); - return -EBUSY; - } - if (flags & IRQ_FLG_REPLACE) { - printk(KERN_ERR "%s: %s can't replace IRQ %d from %s\n", - __FUNCTION__, devname, irq, int_irq_list[irq].devname); - return -EBUSY; - } - } - - int_irq_list[irq].handler = handler; - int_irq_list[irq].flags = flags; - int_irq_list[irq].dev_id = dev_id; - int_irq_list[irq].devname = devname; - - IMR &= ~(1<<irq); - - return 0; -} - -EXPORT_SYMBOL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - if (irq >= NR_IRQS) { - printk (KERN_ERR "%s: Unknown IRQ %d\n", __FUNCTION__, irq); - return; - } - - if (int_irq_list[irq].dev_id != dev_id) - printk(KERN_INFO "%s: removing probably wrong IRQ %d from %s\n", - __FUNCTION__, irq, int_irq_list[irq].devname); - - int_irq_list[irq].handler = bad_interrupt; - int_irq_list[irq].flags = IRQ_FLG_STD; - int_irq_list[irq].dev_id = NULL; - int_irq_list[irq].devname = NULL; - - IMR |= 1<<irq; -} - -EXPORT_SYMBOL(free_irq); - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v; - - if (i < NR_IRQS) { - if (int_irq_list[i].devname) { - seq_printf(p, "%3d: %10u ", i, kstat_cpu(0).irqs[i]); - if (int_irq_list[i].flags & IRQ_FLG_LOCK) - seq_printf(p, "L "); - else - seq_printf(p, " "); - seq_printf(p, "%s\n", int_irq_list[i].devname); - } - } - if (i == NR_IRQS) - seq_printf(p, " : %10u spurious\n", num_spurious); - - return 0; -} - /* The 68k family did not have a good way to determine the source * of interrupts until later in the family. The EC000 core does * not provide the vector number on the stack, we vector everything @@ -255,14 +158,23 @@ void process_int(int vec, struct pt_regs *fp) irq++; } - kstat_cpu(0).irqs[irq]++; - - if (int_irq_list[irq].handler) { - int_irq_list[irq].handler(irq, int_irq_list[irq].dev_id, fp); - } else { - printk(KERN_ERR "unregistered interrupt %d!\nTurning it off in the IMR...\n", irq); - IMR |= mask; - } + do_IRQ(irq, fp); pend &= ~mask; } } + +void enable_vector(unsigned int irq) +{ + IMR &= ~(1<<irq); +} + +void disable_vector(unsigned int irq) +{ + IMR |= (1<<irq); +} + +void ack_vector(unsigned int irq) +{ + /* Nothing needed */ +} + diff --git a/arch/m68knommu/platform/68360/entry.S b/arch/m68knommu/platform/68360/entry.S index f1af8977f294..55dfefe38642 100644 --- a/arch/m68knommu/platform/68360/entry.S +++ b/arch/m68knommu/platform/68360/entry.S @@ -120,23 +120,21 @@ Lreturn: RESTORE_ALL /* - * This is the main interrupt handler, responsible for calling process_int() + * This is the main interrupt handler, responsible for calling do_IRQ() */ inthandler: SAVE_ALL - addql #1,local_irq_count /* put exception # in d0*/ movew %sp@(PT_VECTOR), %d0 and.l #0x3ff, %d0 lsr.l #0x02, %d0 movel %sp,%sp@- movel %d0,%sp@- /* put vector # on stack*/ - jbsr process_int /* process the IRQ*/ + jbsr do_IRQ /* process the IRQ*/ 3: addql #8,%sp /* pop parameters off stack*/ bra ret_from_interrupt ret_from_interrupt: - subql #1,local_irq_count jeq 1f 2: RESTORE_ALL diff --git a/arch/m68knommu/platform/68360/ints.c b/arch/m68knommu/platform/68360/ints.c index 4df3c146eb74..c36781157e09 100644 --- a/arch/m68knommu/platform/68360/ints.c +++ b/arch/m68knommu/platform/68360/ints.c @@ -10,20 +10,13 @@ * Copyright (c) 1999 D. Jeff Dionne <jeff@uclinux.org> */ -#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/kernel_stat.h> -#include <linux/errno.h> - -#include <asm/system.h> -#include <asm/irq.h> -#include <asm/irqnode.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> #include <asm/traps.h> -#include <asm/io.h> #include <asm/machdep.h> -#include <asm/setup.h> #include <asm/m68360.h> /* from quicc/commproc.c: */ @@ -36,26 +29,19 @@ extern void cpm_interrupt_init(void); asmlinkage void system_call(void); asmlinkage void buserr(void); asmlinkage void trap(void); -asmlinkage irqreturn_t bad_interrupt(void); -asmlinkage irqreturn_t inthandler(void); +asmlinkage void bad_interrupt(void); +asmlinkage void inthandler(void); extern void *_ramvec[]; /* The number of spurious interrupts */ volatile unsigned int num_spurious; -unsigned int local_irq_count[NR_CPUS]; - -/* irq node variables for the 32 (potential) on chip sources */ -static irq_node_t int_irq_list[INTERNAL_IRQS]; - -static short int_irq_ablecount[INTERNAL_IRQS]; /* * This function should be called during kernel startup to initialize - * IRQ handling routines. + * the vector table. */ - -void init_IRQ(void) +void init_vectors(void) { int i; int vba = (CPM_VECTOR_BASE<<4); @@ -79,7 +65,6 @@ void init_IRQ(void) _ramvec[32] = system_call; _ramvec[33] = trap; - cpm_interrupt_init(); /* set up CICR for vector base address and irq level */ @@ -124,212 +109,20 @@ void init_IRQ(void) /* turn off all CPM interrupts */ pquicc->intr_cimr = 0x00000000; - - /* initialize handlers */ - for (i = 0; i < INTERNAL_IRQS; i++) { - int_irq_list[i].handler = NULL; - int_irq_list[i].flags = IRQ_FLG_STD; - int_irq_list[i].dev_id = NULL; - int_irq_list[i].devname = NULL; - } -} - -#if 0 -void M68360_insert_irq(irq_node_t **list, irq_node_t *node) -{ - unsigned long flags; - irq_node_t *cur; - - if (!node->dev_id) - printk(KERN_INFO "%s: Warning: dev_id of %s is zero\n", - __FUNCTION__, node->devname); - - local_irq_save(flags); - - cur = *list; - - while (cur) { - list = &cur->next; - cur = cur->next; - } - - node->next = cur; - *list = node; - - local_irq_restore(flags); } -void M68360_delete_irq(irq_node_t **list, void *dev_id) +void enable_vector(unsigned int irq) { - unsigned long flags; - irq_node_t *node; - - local_irq_save(flags); - - for (node = *list; node; list = &node->next, node = *list) { - if (node->dev_id == dev_id) { - *list = node->next; - /* Mark it as free. */ - node->handler = NULL; - local_irq_restore(flags); - return; - } - } - local_irq_restore(flags); - printk (KERN_INFO "%s: tried to remove invalid irq\n", __FUNCTION__); + pquicc->intr_cimr |= (1 << irq); } -#endif -int request_irq( - unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long flags, - const char *devname, - void *dev_id) +void disable_vector(unsigned int irq) { - int mask = (1<<irq); - - irq += (CPM_VECTOR_BASE<<4); - - if (irq >= INTERNAL_IRQS) { - printk (KERN_ERR "%s: Unknown IRQ %d from %s\n", __FUNCTION__, irq, devname); - return -ENXIO; - } - - if (!(int_irq_list[irq].flags & IRQ_FLG_STD)) { - if (int_irq_list[irq].flags & IRQ_FLG_LOCK) { - printk(KERN_ERR "%s: IRQ %d from %s is not replaceable\n", - __FUNCTION__, irq, int_irq_list[irq].devname); - return -EBUSY; - } - if (flags & IRQ_FLG_REPLACE) { - printk(KERN_ERR "%s: %s can't replace IRQ %d from %s\n", - __FUNCTION__, devname, irq, int_irq_list[irq].devname); - return -EBUSY; - } - } - int_irq_list[irq].handler = handler; - int_irq_list[irq].flags = flags; - int_irq_list[irq].dev_id = dev_id; - int_irq_list[irq].devname = devname; - - /* enable in the CIMR */ - if (!int_irq_ablecount[irq]) - pquicc->intr_cimr |= mask; - /* *(volatile unsigned long *)0xfffff304 &= ~(1<<irq); */ - - return 0; + pquicc->intr_cimr &= ~(1 << irq); } -EXPORT_SYMBOL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) +void ack_vector(unsigned int irq) { - if (irq >= INTERNAL_IRQS) { - printk (KERN_ERR "%s: Unknown IRQ %d\n", __FUNCTION__, irq); - return; - } - - if (int_irq_list[irq].dev_id != dev_id) - printk(KERN_INFO "%s: removing probably wrong IRQ %d from %s\n", - __FUNCTION__, irq, int_irq_list[irq].devname); - int_irq_list[irq].handler = NULL; - int_irq_list[irq].flags = IRQ_FLG_STD; - int_irq_list[irq].dev_id = NULL; - int_irq_list[irq].devname = NULL; - - *(volatile unsigned long *)0xfffff304 |= 1<<irq; + pquicc->intr_cisr = (1 << irq); } -EXPORT_SYMBOL(free_irq); - -#if 0 -/* - * Enable/disable a particular machine specific interrupt source. - * Note that this may affect other interrupts in case of a shared interrupt. - * This function should only be called for a _very_ short time to change some - * internal data, that may not be changed by the interrupt at the same time. - * int_(enable|disable)_irq calls may also be nested. - */ -void M68360_enable_irq(unsigned int irq) -{ - if (irq >= INTERNAL_IRQS) { - printk(KERN_ERR "%s: Unknown IRQ %d\n", __FUNCTION__, irq); - return; - } - - if (--int_irq_ablecount[irq]) - return; - - /* enable the interrupt */ - *(volatile unsigned long *)0xfffff304 &= ~(1<<irq); -} - -void M68360_disable_irq(unsigned int irq) -{ - if (irq >= INTERNAL_IRQS) { - printk(KERN_ERR "%s: Unknown IRQ %d\n", __FUNCTION__, irq); - return; - } - - if (int_irq_ablecount[irq]++) - return; - - /* disable the interrupt */ - *(volatile unsigned long *)0xfffff304 |= 1<<irq; -} -#endif - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v; - - if (i < NR_IRQS) { - if (int_irq_list[i].devname) { - seq_printf(p, "%3d: %10u ", i, kstat_cpu(0).irqs[i]); - if (int_irq_list[i].flags & IRQ_FLG_LOCK) - seq_printf(p, "L "); - else - seq_printf(p, " "); - seq_printf(p, "%s\n", int_irq_list[i].devname); - } - } - if (i == NR_IRQS) - seq_printf(p, " : %10u spurious\n", num_spurious); - - return 0; -} - -/* The 68k family did not have a good way to determine the source - * of interrupts until later in the family. The EC000 core does - * not provide the vector number on the stack, we vector everything - * into one vector and look in the blasted mask register... - * This code is designed to be fast, almost constant time, not clean! - */ -void process_int(int vec, struct pt_regs *fp) -{ - int irq; - int mask; - - /* unsigned long pend = *(volatile unsigned long *)0xfffff30c; */ - - /* irq = vec + (CPM_VECTOR_BASE<<4); */ - irq = vec; - - /* unsigned long pend = *(volatile unsigned long *)pquicc->intr_cipr; */ - - /* Bugger all that weirdness. For the moment, I seem to know where I came from; - * vec is passed from a specific ISR, so I'll use it. */ - - if (int_irq_list[irq].handler) { - int_irq_list[irq].handler(irq , int_irq_list[irq].dev_id, fp); - kstat_cpu(0).irqs[irq]++; - pquicc->intr_cisr = (1 << vec); /* indicate that irq has been serviced */ - } else { - printk(KERN_ERR "unregistered interrupt %d!\nTurning it off in the CIMR...\n", irq); - /* *(volatile unsigned long *)0xfffff304 |= mask; */ - pquicc->intr_cimr &= ~(1 << vec); - num_spurious += 1; - } - return(IRQ_HANDLED); -} diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 9b9992cd562a..bc9bae2a73f4 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -119,10 +119,7 @@ SECTIONS .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(_PAGE_SIZE); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(_PAGE_SIZE) . = ALIGN(_PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 7ebea331edb8..521771b373de 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -39,6 +39,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; siginfo_t info; + int fault; #if 0 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), @@ -102,20 +103,18 @@ survive: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 4d96ba4b9849..d4e6a93c8d9a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -181,10 +181,9 @@ SECTIONS .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(ASM_PAGE_SIZE); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + + PERCPU(ASM_PAGE_SIZE) + . = ALIGN(ASM_PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f6f67554c623..7899ab87785a 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -147,6 +147,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, struct mm_struct *mm = tsk->mm; const struct exception_table_entry *fix; unsigned long acc_type; + int fault; if (in_atomic() || !mm) goto no_context; @@ -173,23 +174,23 @@ good_area: * fault. */ - switch (handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) != 0)) { - case VM_FAULT_MINOR: - ++current->min_flt; - break; - case VM_FAULT_MAJOR: - ++current->maj_flt; - break; - case VM_FAULT_SIGBUS: + fault = handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) != 0); + if (unlikely(fault & VM_FAULT_ERROR)) { /* * We hit a shared mapping outside of the file, or some * other thing happened to us that made us unable to * handle the page fault gracefully. */ - goto bad_area; - default: - goto out_of_memory; + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto bad_area; + BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts index db56a02b748f..6a78a2b37c08 100644 --- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts +++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts @@ -131,6 +131,7 @@ interrupts = <1d 2 1e 2 22 2>; interrupt-parent = <&mpic>; phy-handle = <&phy0>; + phy-connection-type = "rgmii-id"; }; ethernet@25000 { @@ -150,6 +151,7 @@ interrupts = <23 2 24 2 28 2>; interrupt-parent = <&mpic>; phy-handle = <&phy1>; + phy-connection-type = "rgmii-id"; }; ethernet@26000 { @@ -169,6 +171,7 @@ interrupts = <1F 2 20 2 21 2>; interrupt-parent = <&mpic>; phy-handle = <&phy2>; + phy-connection-type = "rgmii-id"; }; ethernet@27000 { @@ -188,6 +191,7 @@ interrupts = <25 2 26 2 27 2>; interrupt-parent = <&mpic>; phy-handle = <&phy3>; + phy-connection-type = "rgmii-id"; }; serial@4500 { device_type = "serial"; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 0c96611f02f4..440f5a87271f 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -492,6 +492,13 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, return ret; } +#ifdef CONFIG_PPC64 +unsigned long arch_deref_entry_point(void *entry) +{ + return (unsigned long)(((func_descr_t *)entry)->entry); +} +#endif + int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); @@ -500,11 +507,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); /* setup return addr to the jprobe handler routine */ + regs->nip = arch_deref_entry_point(jp->entry); #ifdef CONFIG_PPC64 - regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry); regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); -#else - regs->nip = (unsigned long)jp->entry; #endif return 1; diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index c492cee90e0f..6444eaa30a2f 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -248,7 +248,7 @@ static void parse_system_parameter_string(struct seq_file *m) } else { int splpar_strlen; int idx, w_idx; - char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!workbuffer) { printk(KERN_ERR "%s %s kmalloc failure at line %d \n", __FILE__, __FUNCTION__, __LINE__); @@ -261,7 +261,6 @@ static void parse_system_parameter_string(struct seq_file *m) splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; local_buffer += 2; /* step over strlen value */ - memset(workbuffer, 0, SPLPAR_MAXLENGTH); w_idx = 0; idx = 0; while ((*local_buffer) && (idx < splpar_strlen)) { diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 9536ed7f247c..8ded4e7dc87e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -222,10 +222,9 @@ struct of_device* of_platform_device_create(struct device_node *np, { struct of_device *dev; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; - memset(dev, 0, sizeof(*dev)); dev->node = of_node_get(np); dev->dma_mask = 0xffffffffUL; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index ae4acd84143d..7a1f5a0964de 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -63,6 +63,8 @@ SECTIONS __stop___ex_table = .; } + NOTES + BUG_TABLE /* @@ -144,6 +146,7 @@ SECTIONS .data.percpu : { __per_cpu_start = .; *(.data.percpu) + *(.data.percpu.shared_aligned) __per_cpu_end = .; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0ece51310bfe..3767211b3d0f 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,7 +145,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, struct mm_struct *mm = current->mm; siginfo_t info; int code = SEGV_MAPERR; - int is_write = 0; + int is_write = 0, ret; int trap = TRAP(regs); int is_exec = trap == 0x400; @@ -330,22 +330,18 @@ good_area: * the fault. */ survive: - switch (handle_mm_fault(mm, vma, address, is_write)) { - - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + ret = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(ret & VM_FAULT_ERROR)) { + if (ret & VM_FAULT_OOM) + goto out_of_memory; + else if (ret & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } - + if (ret & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return 0; diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e064d0c0d80e..07f88de0544d 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -74,23 +74,21 @@ good_area: goto bad_area; } ret = 0; - *flt = handle_mm_fault(mm, vma, ea, is_write); - switch (*flt) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - ret = -EFAULT; - goto bad_area; - case VM_FAULT_OOM: - ret = -ENOMEM; - goto bad_area; - default: + fault = handle_mm_fault(mm, vma, ea, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) { + ret = -ENOMEM; + goto bad_area; + } else if (fault & VM_FAULT_SIGBUS) { + ret = -EFAULT; + goto bad_area; + } BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return ret; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index c0ddc80d8160..3289fab01e92 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -197,6 +197,7 @@ static int __init gfar_of_init(void) struct gianfar_platform_data gfar_data; const unsigned int *id; const char *model; + const char *ctype; const void *mac_addr; const phandle *ph; int n_res = 2; @@ -254,6 +255,14 @@ static int __init gfar_of_init(void) FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH; + ctype = of_get_property(np, "phy-connection-type", NULL); + + /* We only care about rgmii-id. The rest are autodetected */ + if (ctype && !strcmp(ctype, "rgmii-id")) + gfar_data.interface = PHY_INTERFACE_MODE_RGMII_ID; + else + gfar_data.interface = PHY_INTERFACE_MODE_MII; + ph = of_get_property(np, "phy-handle", NULL); phy = of_find_node_by_phandle(*ph); diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index 19db8746ff14..c0aac3ff9e91 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -130,10 +130,7 @@ SECTIONS __ftr_fixup : { *(__ftr_fixup) } __stop___ftr_fixup = .; - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(4096); diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c index 465f451f3bc3..b98244e277fb 100644 --- a/arch/ppc/mm/fault.c +++ b/arch/ppc/mm/fault.c @@ -96,6 +96,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, struct mm_struct *mm = current->mm; siginfo_t info; int code = SEGV_MAPERR; + int fault; #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE) int is_write = error_code & ESR_DST; #else @@ -249,20 +250,18 @@ good_area: * the fault. */ survive: - switch (handle_mm_fault(mm, vma, address, is_write)) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); /* diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 7158a804a5e4..6ab7d4ee13a4 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -45,6 +45,8 @@ SECTIONS __ex_table : { *(__ex_table) } __stop___ex_table = .; + NOTES + BUG_TABLE .data : { /* Data */ @@ -107,10 +109,7 @@ SECTIONS . = ALIGN(2); __initramfs_end = .; #endif - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) . = ALIGN(4096); __init_end = .; /* freed after init ends here */ diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 63181671e3e3..60604b2819b2 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -20,6 +20,7 @@ static int __handle_fault(struct mm_struct *mm, unsigned long address, { struct vm_area_struct *vma; int ret = -EFAULT; + int fault; if (in_atomic()) return ret; @@ -44,20 +45,18 @@ static int __handle_fault(struct mm_struct *mm, unsigned long address, } survive: - switch (handle_mm_fault(mm, vma, address, write_access)) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto out_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, write_access); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto out_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; ret = 0; out: up_read(&mm->mmap_sem); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d855cdbf8fb8..54055194e9af 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -307,6 +307,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) unsigned long address; int space; int si_code; + int fault; if (notify_page_fault(regs, error_code)) return; @@ -377,23 +378,22 @@ survive: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - do_sigbus(regs, error_code, address); - return; - case VM_FAULT_OOM: - if (do_out_of_memory(regs, error_code, address)) - goto survive; - return; - default: + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) { + if (do_out_of_memory(regs, error_code, address)) + goto survive; + return; + } else if (fault & VM_FAULT_SIGBUS) { + do_sigbus(regs, error_code, address); + return; + } BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); /* diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 0696402f446a..5ba216180b30 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -60,10 +60,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); __nosave_end = .; - . = ALIGN(PAGE_SIZE); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(PAGE_SIZE) .data.cacheline_aligned : { *(.data.cacheline_aligned) } _edata = .; /* End of data section */ diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 0b3eaf6fbb28..964c6767dc73 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -33,6 +33,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, struct mm_struct *mm; struct vm_area_struct * vma; int si_code; + int fault; siginfo_t info; trace_hardirqs_on(); @@ -124,20 +125,18 @@ good_area: * the fault. */ survive: - switch (handle_mm_fault(mm, vma, address, writeaccess)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: + fault = handle_mm_fault(mm, vma, address, writeaccess); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) goto out_of_memory; - default: - BUG(); + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S index 02aea86c5907..8ac9c7c5f848 100644 --- a/arch/sh64/kernel/vmlinux.lds.S +++ b/arch/sh64/kernel/vmlinux.lds.S @@ -87,7 +87,10 @@ SECTIONS . = ALIGN(PAGE_SIZE); __per_cpu_start = .; - .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) } + .data.percpu : C_PHYS(.data.percpu) { + *(.data.percpu) + *(.data.percpu.shared_aligned) + } __per_cpu_end = . ; .data.cacheline_aligned : C_PHYS(.data.cacheline_aligned) { *(.data.cacheline_aligned) } diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c index 3cd93ba5d826..0d069d82141f 100644 --- a/arch/sh64/mm/fault.c +++ b/arch/sh64/mm/fault.c @@ -127,6 +127,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, struct vm_area_struct * vma; const struct exception_table_entry *fixup; pte_t *pte; + int fault; #if defined(CONFIG_SH64_PROC_TLB) ++calls_to_do_slow_page_fault; @@ -221,18 +222,19 @@ good_area: * the fault. */ survive: - switch (handle_mm_fault(mm, vma, address, writeaccess)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - default: - goto out_of_memory; + fault = handle_mm_fault(mm, vma, address, writeaccess); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + /* If we get here, the page fault has been handled. Do the TLB refill now from the newly-setup PTE, to avoid having to fault again right away on the same instruction. */ diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index f75a1b822789..47583887abc6 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -65,10 +65,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) . = ALIGN(4096); __init_end = .; . = ALIGN(32); diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c index c3483365db4b..50747fe44356 100644 --- a/arch/sparc/mm/fault.c +++ b/arch/sparc/mm/fault.c @@ -226,6 +226,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long g2; siginfo_t info; int from_user = !(regs->psr & PSR_PS); + int fault; if(text_fault) address = regs->pc; @@ -289,19 +290,18 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - case VM_FAULT_MAJOR: + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) current->maj_flt++; - break; - case VM_FAULT_MINOR: - default: + else current->min_flt++; - break; - } up_read(&mm->mmap_sem); return; diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 3ad10f3027e4..481861764deb 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -90,10 +90,8 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(PAGE_SIZE); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); __init_end = .; __bss_start = .; diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index b582024d2199..17123e9ecf78 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -278,7 +278,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned int insn = 0; - int si_code, fault_code; + int si_code, fault_code, fault; unsigned long address, mm_rss; fault_code = get_thread_fault_code(); @@ -415,20 +415,18 @@ good_area: goto bad_area; } - switch (handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE))) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE)); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); diff --git a/arch/sparc64/solaris/socksys.c b/arch/sparc64/solaris/socksys.c index e94f6e5d9455..7736411f244f 100644 --- a/arch/sparc64/solaris/socksys.c +++ b/arch/sparc64/solaris/socksys.c @@ -199,6 +199,5 @@ int __init init_socksys(void) void __exit cleanup_socksys(void) { - if (unregister_chrdev(30, "socksys")) - printk ("Couldn't unregister socksys character device\n"); + unregister_chrdev(30, "socksys"); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index abab90c3803f..3850d53f79fd 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -76,23 +76,24 @@ good_area: goto out; do { + int fault; survive: - switch (handle_mm_fault(mm, vma, address, is_write)){ - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - err = -EACCES; - goto out; - case VM_FAULT_OOM: - err = -ENOMEM; - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) { + err = -ENOMEM; + goto out_of_memory; + } else if (fault & VM_FAULT_SIGBUS) { + err = -EACCES; + goto out; + } BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index fe83edb93c10..08781370256d 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -404,7 +404,7 @@ beyond_if: set_brk(current->mm->start_brk, current->mm->brk); - retval = ia32_setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); + retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); if (retval < 0) { /* Someone check-me: is this error path enough? */ send_sig(SIGKILL, current, 0); diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 185399baaf6d..ed56a8806eab 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -232,9 +232,6 @@ do { \ #define load_elf_binary load_elf32_binary #define ELF_PLAT_INIT(r, load_addr) elf32_init(r) -#define setup_arg_pages(bprm, stack_top, exec_stack) \ - ia32_setup_arg_pages(bprm, stack_top, exec_stack) -int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack); #undef start_thread #define start_thread(regs,new_rip,new_rsp) do { \ @@ -286,61 +283,6 @@ static void elf32_init(struct pt_regs *regs) me->thread.es = __USER_DS; } -int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, - int executable_stack) -{ - unsigned long stack_base; - struct vm_area_struct *mpnt; - struct mm_struct *mm = current->mm; - int i, ret; - - stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE; - mm->arg_start = bprm->p + stack_base; - - bprm->p += stack_base; - if (bprm->loader) - bprm->loader += stack_base; - bprm->exec += stack_base; - - mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); - if (!mpnt) - return -ENOMEM; - - down_write(&mm->mmap_sem); - { - mpnt->vm_mm = mm; - mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; - mpnt->vm_end = stack_top; - if (executable_stack == EXSTACK_ENABLE_X) - mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; - else if (executable_stack == EXSTACK_DISABLE_X) - mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; - else - mpnt->vm_flags = VM_STACK_FLAGS; - mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? - PAGE_COPY_EXEC : PAGE_COPY; - if ((ret = insert_vm_struct(mm, mpnt))) { - up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, mpnt); - return ret; - } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); - } - - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page *page = bprm->page[i]; - if (page) { - bprm->page[i] = NULL; - install_arg_page(mpnt, page, stack_base); - } - stack_base += PAGE_SIZE; - } - up_write(&mm->mmap_sem); - - return 0; -} -EXPORT_SYMBOL(ia32_setup_arg_pages); - #ifdef CONFIG_SYSCTL /* Register vsyscall32 into the ABI table */ #include <linux/sysctl.h> diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 195b7034a148..4277f2b27e6d 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c @@ -55,7 +55,7 @@ /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; -unsigned long acpi_video_flags; +unsigned long acpi_realmode_flags; extern char wakeup_start, wakeup_end; extern unsigned long acpi_copy_wakeup_routine(unsigned long); @@ -103,9 +103,11 @@ static int __init acpi_sleep_setup(char *str) { while ((str != NULL) && (*str != '\0')) { if (strncmp(str, "s3_bios", 7) == 0) - acpi_video_flags = 1; + acpi_realmode_flags |= 1; if (strncmp(str, "s3_mode", 7) == 0) - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S index 8550a6ffa275..13f1480cbec9 100644 --- a/arch/x86_64/kernel/acpi/wakeup.S +++ b/arch/x86_64/kernel/acpi/wakeup.S @@ -16,6 +16,21 @@ # cs = 0x1234, eip = 0x05 # +#define BEEP \ + inb $97, %al; \ + outb %al, $0x80; \ + movb $3, %al; \ + outb %al, $97; \ + outb %al, $0x80; \ + movb $-74, %al; \ + outb %al, $67; \ + outb %al, $0x80; \ + movb $-119, %al; \ + outb %al, $66; \ + outb %al, $0x80; \ + movb $15, %al; \ + outb %al, $66; + ALIGN .align 16 @@ -33,6 +48,13 @@ wakeup_code: movw %cs, %ax movw %ax, %ds # Make ds:0 point to wakeup_start movw %ax, %ss + + # Data segment must be set up before we can see whether to beep. + testl $4, realmode_flags - wakeup_code + jz 1f + BEEP +1: + # Private stack is needed for ASUS board mov $(wakeup_stack - wakeup_code), %sp @@ -48,7 +70,7 @@ wakeup_code: testl %eax, %eax jnz no_longmode - testl $1, video_flags - wakeup_code + testl $1, realmode_flags - wakeup_code jz 1f lcall $0xc000,$3 movw %cs, %ax @@ -56,7 +78,7 @@ wakeup_code: movw %ax, %ss 1: - testl $2, video_flags - wakeup_code + testl $2, realmode_flags - wakeup_code jz 1f mov video_mode - wakeup_code, %ax call mode_seta @@ -230,7 +252,7 @@ gdt_48a: real_magic: .quad 0 video_mode: .quad 0 -video_flags: .quad 0 +realmode_flags: .quad 0 .code16 bogus_real_magic: @@ -346,8 +368,8 @@ ENTRY(acpi_copy_wakeup_routine) movl saved_video_mode, %edx movl %edx, video_mode - wakeup_start (,%rdi) - movl acpi_video_flags, %edx - movl %edx, video_flags - wakeup_start (,%rdi) + movl acpi_realmode_flags, %edx + movl %edx, realmode_flags - wakeup_start (,%rdi) movq $0x12345678, real_magic - wakeup_start (,%rdi) movq $0x123456789abcdef0, %rdx movq %rdx, saved_magic diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c index 3dc5854ba21e..4ff33d4f8551 100644 --- a/arch/x86_64/kernel/init_task.c +++ b/arch/x86_64/kernel/init_task.c @@ -44,7 +44,7 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; /* Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 74cbeb2e99a6..8713ad4a4db1 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -34,6 +34,10 @@ #include <linux/bug.h> #include <linux/kdebug.h> +#if defined(CONFIG_EDAC) +#include <linux/edac.h> +#endif + #include <asm/system.h> #include <asm/io.h> #include <asm/atomic.h> @@ -719,6 +723,13 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) reason); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c index 48f9a8e6aa91..e850aa01e1b3 100644 --- a/arch/x86_64/kernel/tsc.c +++ b/arch/x86_64/kernel/tsc.c @@ -44,7 +44,7 @@ unsigned long long sched_clock(void) static int tsc_unstable; -static inline int check_tsc_unstable(void) +inline int check_tsc_unstable(void) { return tsc_unstable; } diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index dbccfda8364f..5c57ea4591c1 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -48,7 +48,9 @@ SECTIONS __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } __stop___ex_table = .; - BUG_TABLE + NOTES :text :note + + BUG_TABLE :text RODATA @@ -194,10 +196,8 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) + . = ALIGN(4096); __init_end = .; diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 635e58d443d7..84f11728fc76 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -317,7 +317,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, struct vm_area_struct * vma; unsigned long address; const struct exception_table_entry *fixup; - int write; + int write, fault; unsigned long flags; siginfo_t info; @@ -450,19 +450,18 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - default: - goto out_of_memory; + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } - + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index b0582c3c5f8d..3e31512109f9 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -190,10 +190,7 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(4096); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(4096) /* We need this dummy segment here */ diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 3dc6f2f07bbe..16004067add3 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -41,6 +41,7 @@ void do_page_fault(struct pt_regs *regs) siginfo_t info; int is_write, is_exec; + int fault; info.si_code = SEGV_MAPERR; @@ -102,20 +103,18 @@ good_area: * the fault. */ survive: - switch (handle_mm_fault(mm, vma, address, is_write)) { - case VM_FAULT_MINOR: - current->min_flt++; - break; - case VM_FAULT_MAJOR: - current->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; up_read(&mm->mmap_sem); return; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a26ba07955fe..71bdf88884b2 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -436,11 +436,10 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, bytes = max(in_len, out_len); if (bytes) { - buffer = kmalloc(bytes, q->bounce_gfp | GFP_USER| __GFP_NOWARN); + buffer = kzalloc(bytes, q->bounce_gfp | GFP_USER| __GFP_NOWARN); if (!buffer) return -ENOMEM; - memset(buffer, 0, bytes); } rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); diff --git a/drivers/Kconfig b/drivers/Kconfig index 7916f4b86d23..707650ab77a7 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -84,4 +84,7 @@ source "drivers/auxdisplay/Kconfig" source "drivers/kvm/Kconfig" +source "drivers/uio/Kconfig" + +source "drivers/lguest/Kconfig" endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 6d9d7fab77f5..0ea8e3237c0d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_FUSION) += message/ obj-$(CONFIG_FIREWIRE) += firewire/ obj-$(CONFIG_IEEE1394) += ieee1394/ +obj-$(CONFIG_UIO) += uio/ obj-y += cdrom/ obj-y += auxdisplay/ obj-$(CONFIG_MTD) += mtd/ @@ -72,6 +73,7 @@ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_EDAC) += edac/ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ +obj-$(CONFIG_LGUEST_GUEST) += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index bc7e16ec8393..42127c0d612c 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -217,10 +217,26 @@ static void acpi_hibernation_finish(void) } } +static int acpi_hibernation_pre_restore(void) +{ + acpi_status status; + + status = acpi_hw_disable_all_gpes(); + + return ACPI_SUCCESS(status) ? 0 : -EFAULT; +} + +static void acpi_hibernation_restore_cleanup(void) +{ + acpi_hw_enable_all_runtime_gpes(); +} + static struct hibernation_ops acpi_hibernation_ops = { .prepare = acpi_hibernation_prepare, .enter = acpi_hibernation_enter, .finish = acpi_hibernation_finish, + .pre_restore = acpi_hibernation_pre_restore, + .restore_cleanup = acpi_hibernation_restore_cleanup, }; #endif /* CONFIG_SOFTWARE_SUSPEND */ diff --git a/drivers/acpi/sleep/poweroff.c b/drivers/acpi/sleep/poweroff.c index d9801eff6489..39e40d56b034 100644 --- a/drivers/acpi/sleep/poweroff.c +++ b/drivers/acpi/sleep/poweroff.c @@ -39,7 +39,13 @@ int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_PM -void acpi_power_off(void) +static void acpi_power_off_prepare(void) +{ + /* Prepare to power off the system */ + acpi_sleep_prepare(ACPI_STATE_S5); +} + +static void acpi_power_off(void) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ printk("%s called\n", __FUNCTION__); @@ -48,30 +54,6 @@ void acpi_power_off(void) acpi_enter_sleep_state(ACPI_STATE_S5); } -static int acpi_shutdown(struct sys_device *x) -{ - switch (system_state) { - case SYSTEM_POWER_OFF: - /* Prepare to power off the system */ - return acpi_sleep_prepare(ACPI_STATE_S5); - case SYSTEM_SUSPEND_DISK: - /* Prepare to suspend the system to disk */ - return acpi_sleep_prepare(ACPI_STATE_S4); - default: - return 0; - } -} - -static struct sysdev_class acpi_sysclass = { - set_kset_name("acpi"), - .shutdown = acpi_shutdown -}; - -static struct sys_device device_acpi = { - .id = 0, - .cls = &acpi_sysclass, -}; - static int acpi_poweroff_init(void) { if (!acpi_disabled) { @@ -81,13 +63,8 @@ static int acpi_poweroff_init(void) status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); if (ACPI_SUCCESS(status)) { - int error; - error = sysdev_class_register(&acpi_sysclass); - if (!error) - error = sysdev_register(&device_acpi); - if (!error) - pm_power_off = acpi_power_off; - return error; + pm_power_off_prepare = acpi_power_off_prepare; + pm_power_off = acpi_power_off; } } return 0; diff --git a/drivers/base/core.c b/drivers/base/core.c index 0455aa78fa13..3599ab2506d2 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -24,6 +24,8 @@ #include "base.h" #include "power/power.h" +extern const char *kobject_actions[]; + int (*platform_notify)(struct device * dev) = NULL; int (*platform_notify_remove)(struct device * dev) = NULL; @@ -303,10 +305,25 @@ out: static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - if (memcmp(buf, "add", 3) != 0) - dev_err(dev, "uevent: unsupported action-string; this will " - "be ignored in a future kernel version"); + size_t len = count; + enum kobject_action action; + + if (len && buf[len-1] == '\n') + len--; + + for (action = 0; action < KOBJ_MAX; action++) { + if (strncmp(kobject_actions[action], buf, len) != 0) + continue; + if (kobject_actions[action][len] != '\0') + continue; + kobject_uevent(&dev->kobj, action); + goto out; + } + + dev_err(dev, "uevent: unsupported action-string; this will " + "be ignored in a future kernel version\n"); kobject_uevent(&dev->kobj, KOBJ_ADD); +out: return count; } @@ -643,6 +660,82 @@ static int setup_parent(struct device *dev, struct device *parent) return 0; } +static int device_add_class_symlinks(struct device *dev) +{ + int error; + + if (!dev->class) + return 0; + error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, + "subsystem"); + if (error) + goto out; + /* + * If this is not a "fake" compatible device, then create the + * symlink from the class to the device. + */ + if (dev->kobj.parent != &dev->class->subsys.kobj) { + error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, + dev->bus_id); + if (error) + goto out_subsys; + } + /* only bus-device parents get a "device"-link */ + if (dev->parent && dev->parent->bus) { + error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, + "device"); + if (error) + goto out_busid; +#ifdef CONFIG_SYSFS_DEPRECATED + { + char * class_name = make_class_name(dev->class->name, + &dev->kobj); + if (class_name) + error = sysfs_create_link(&dev->parent->kobj, + &dev->kobj, class_name); + kfree(class_name); + if (error) + goto out_device; + } +#endif + } + return 0; + +#ifdef CONFIG_SYSFS_DEPRECATED +out_device: + if (dev->parent) + sysfs_remove_link(&dev->kobj, "device"); +#endif +out_busid: + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); +out_subsys: + sysfs_remove_link(&dev->kobj, "subsystem"); +out: + return error; +} + +static void device_remove_class_symlinks(struct device *dev) +{ + if (!dev->class) + return; + if (dev->parent) { +#ifdef CONFIG_SYSFS_DEPRECATED + char *class_name; + + class_name = make_class_name(dev->class->name, &dev->kobj); + if (class_name) { + sysfs_remove_link(&dev->parent->kobj, class_name); + kfree(class_name); + } +#endif + sysfs_remove_link(&dev->kobj, "device"); + } + if (dev->kobj.parent != &dev->class->subsys.kobj) + sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); + sysfs_remove_link(&dev->kobj, "subsystem"); +} + /** * device_add - add device to device hierarchy. * @dev: device. @@ -657,7 +750,6 @@ static int setup_parent(struct device *dev, struct device *parent) int device_add(struct device *dev) { struct device *parent = NULL; - char *class_name = NULL; struct class_interface *class_intf; int error = -EINVAL; @@ -697,27 +789,9 @@ int device_add(struct device *dev) goto ueventattrError; } - if (dev->class) { - sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, - "subsystem"); - /* If this is not a "fake" compatible device, then create the - * symlink from the class to the device. */ - if (dev->kobj.parent != &dev->class->subsys.kobj) - sysfs_create_link(&dev->class->subsys.kobj, - &dev->kobj, dev->bus_id); - if (parent) { - sysfs_create_link(&dev->kobj, &dev->parent->kobj, - "device"); -#ifdef CONFIG_SYSFS_DEPRECATED - class_name = make_class_name(dev->class->name, - &dev->kobj); - if (class_name) - sysfs_create_link(&dev->parent->kobj, - &dev->kobj, class_name); -#endif - } - } - + error = device_add_class_symlinks(dev); + if (error) + goto SymlinkError; error = device_add_attrs(dev); if (error) goto AttrsError; @@ -744,7 +818,6 @@ int device_add(struct device *dev) up(&dev->class->sem); } Done: - kfree(class_name); put_device(dev); return error; BusError: @@ -755,6 +828,8 @@ int device_add(struct device *dev) BUS_NOTIFY_DEL_DEVICE, dev); device_remove_attrs(dev); AttrsError: + device_remove_class_symlinks(dev); + SymlinkError: if (MAJOR(dev->devt)) device_remove_file(dev, &devt_attr); @@ -1139,7 +1214,7 @@ int device_rename(struct device *dev, char *new_name) { char *old_class_name = NULL; char *new_class_name = NULL; - char *old_symlink_name = NULL; + char *old_device_name = NULL; int error; dev = get_device(dev); @@ -1153,42 +1228,49 @@ int device_rename(struct device *dev, char *new_name) old_class_name = make_class_name(dev->class->name, &dev->kobj); #endif - if (dev->class) { - old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); - if (!old_symlink_name) { - error = -ENOMEM; - goto out_free_old_class; - } - strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE); + old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); + if (!old_device_name) { + error = -ENOMEM; + goto out; } - + strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE); strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); error = kobject_rename(&dev->kobj, new_name); + if (error) { + strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE); + goto out; + } #ifdef CONFIG_SYSFS_DEPRECATED if (old_class_name) { new_class_name = make_class_name(dev->class->name, &dev->kobj); if (new_class_name) { - sysfs_create_link(&dev->parent->kobj, &dev->kobj, - new_class_name); + error = sysfs_create_link(&dev->parent->kobj, + &dev->kobj, new_class_name); + if (error) + goto out; sysfs_remove_link(&dev->parent->kobj, old_class_name); } } #endif if (dev->class) { - sysfs_remove_link(&dev->class->subsys.kobj, - old_symlink_name); - sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, - dev->bus_id); + sysfs_remove_link(&dev->class->subsys.kobj, old_device_name); + error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, + dev->bus_id); + if (error) { + /* Uh... how to unravel this if restoring can fail? */ + dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n", + __FUNCTION__, error); + } } +out: put_device(dev); kfree(new_class_name); - kfree(old_symlink_name); - out_free_old_class: kfree(old_class_name); + kfree(old_device_name); return error; } diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 91f230939c1e..966a5e287415 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,10 +1,10 @@ obj-y := shutdown.o -obj-$(CONFIG_PM) += main.o suspend.o resume.o runtime.o sysfs.o +obj-$(CONFIG_PM) += main.o suspend.o resume.o sysfs.o obj-$(CONFIG_PM_TRACE) += trace.o ifeq ($(CONFIG_DEBUG_DRIVER),y) EXTRA_CFLAGS += -DDEBUG endif -ifeq ($(CONFIG_PM_DEBUG),y) +ifeq ($(CONFIG_PM_VERBOSE),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 2760f25b3ac5..591a0dd5deee 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -62,11 +62,6 @@ extern int resume_device(struct device *); */ extern int suspend_device(struct device *, pm_message_t); - -/* - * runtime.c - */ - #else /* CONFIG_PM */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c deleted file mode 100644 index df6174d85866..000000000000 --- a/drivers/base/power/runtime.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * drivers/base/power/runtime.c - Handling dynamic device power management. - * - * Copyright (c) 2003 Patrick Mochel - * Copyright (c) 2003 Open Source Development Lab - * - */ - -#include <linux/device.h> -#include "power.h" - - -static void runtime_resume(struct device * dev) -{ - dev_dbg(dev, "resuming\n"); - if (!dev->power.power_state.event) - return; - if (!resume_device(dev)) - dev->power.power_state = PMSG_ON; -} - - -/** - * dpm_runtime_resume - Power one device back on. - * @dev: Device. - * - * Bring one device back to the on state by first powering it - * on, then restoring state. We only operate on devices that aren't - * already on. - * FIXME: We need to handle devices that are in an unknown state. - */ - -void dpm_runtime_resume(struct device * dev) -{ - mutex_lock(&dpm_mtx); - runtime_resume(dev); - mutex_unlock(&dpm_mtx); -} -EXPORT_SYMBOL(dpm_runtime_resume); - - -/** - * dpm_runtime_suspend - Put one device in low-power state. - * @dev: Device. - * @state: State to enter. - */ - -int dpm_runtime_suspend(struct device * dev, pm_message_t state) -{ - int error = 0; - - mutex_lock(&dpm_mtx); - if (dev->power.power_state.event == state.event) - goto Done; - - if (dev->power.power_state.event) - runtime_resume(dev); - - if (!(error = suspend_device(dev, state))) - dev->power.power_state = state; - Done: - mutex_unlock(&dpm_mtx); - return error; -} -EXPORT_SYMBOL(dpm_runtime_suspend); - - -#if 0 -/** - * dpm_set_power_state - Update power_state field. - * @dev: Device. - * @state: Power state device is in. - * - * This is an update mechanism for drivers to notify the core - * what power state a device is in. Device probing code may not - * always be able to tell, but we need accurate information to - * work reliably. - */ -void dpm_set_power_state(struct device * dev, pm_message_t state) -{ - mutex_lock(&dpm_mtx); - dev->power.power_state = state; - mutex_unlock(&dpm_mtx); -} -#endif /* 0 */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 2d47517dbe32..f2ed179cd695 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -7,69 +7,6 @@ #include "power.h" -#ifdef CONFIG_PM_SYSFS_DEPRECATED - -/** - * state - Control current power state of device - * - * show() returns the current power state of the device. '0' indicates - * the device is on. Other values (2) indicate the device is in some low - * power state. - * - * store() sets the current power state, which is an integer valued - * 0, 2, or 3. Devices with bus.suspend_late(), or bus.resume_early() - * methods fail this operation; those methods couldn't be called. - * Otherwise, - * - * - If the recorded dev->power.power_state.event matches the - * target value, nothing is done. - * - If the recorded event code is nonzero, the device is reactivated - * by calling bus.resume() and/or class.resume(). - * - If the target value is nonzero, the device is suspended by - * calling class.suspend() and/or bus.suspend() with event code - * PM_EVENT_SUSPEND. - * - * This mechanism is DEPRECATED and should only be used for testing. - */ - -static ssize_t state_show(struct device * dev, struct device_attribute *attr, char * buf) -{ - if (dev->power.power_state.event) - return sprintf(buf, "2\n"); - else - return sprintf(buf, "0\n"); -} - -static ssize_t state_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) -{ - pm_message_t state; - int error = -EINVAL; - - /* disallow incomplete suspend sequences */ - if (dev->bus && (dev->bus->suspend_late || dev->bus->resume_early)) - return error; - - state.event = PM_EVENT_SUSPEND; - /* Older apps expected to write "3" here - confused with PCI D3 */ - if ((n == 1) && !strcmp(buf, "3")) - error = dpm_runtime_suspend(dev, state); - - if ((n == 1) && !strcmp(buf, "2")) - error = dpm_runtime_suspend(dev, state); - - if ((n == 1) && !strcmp(buf, "0")) { - dpm_runtime_resume(dev); - error = 0; - } - - return error ? error : n; -} - -static DEVICE_ATTR(state, 0644, state_show, state_store); - - -#endif /* CONFIG_PM_SYSFS_DEPRECATED */ - /* * wakeup - Report/change current wakeup option for device * @@ -143,9 +80,6 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); static struct attribute * power_attrs[] = { -#ifdef CONFIG_PM_SYSFS_DEPRECATED - &dev_attr_state.attr, -#endif &dev_attr_wakeup.attr, NULL, }; diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 3e31532df0ed..819c829125fb 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_LGUEST_GUEST) += lguest_blk.o diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c new file mode 100644 index 000000000000..1634c2dd25ec --- /dev/null +++ b/drivers/block/lguest_blk.c @@ -0,0 +1,275 @@ +/* A simple block driver for lguest. + * + * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +//#define DEBUG +#include <linux/init.h> +#include <linux/types.h> +#include <linux/blkdev.h> +#include <linux/interrupt.h> +#include <linux/lguest_bus.h> + +static char next_block_index = 'a'; + +struct blockdev +{ + spinlock_t lock; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* The major number for this disk. */ + int major; + int irq; + + unsigned long phys_addr; + /* The mapped block page. */ + struct lguest_block_page *lb_page; + + /* We only have a single request outstanding at a time. */ + struct lguest_dma dma; + struct request *req; +}; + +/* Jens gave me this nice helper to end all chunks of a request. */ +static void end_entire_request(struct request *req, int uptodate) +{ + if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) + BUG(); + add_disk_randomness(req->rq_disk); + blkdev_dequeue_request(req); + end_that_request_last(req, uptodate); +} + +static irqreturn_t lgb_irq(int irq, void *_bd) +{ + struct blockdev *bd = _bd; + unsigned long flags; + + if (!bd->req) { + pr_debug("No work!\n"); + return IRQ_NONE; + } + + if (!bd->lb_page->result) { + pr_debug("No result!\n"); + return IRQ_NONE; + } + + spin_lock_irqsave(&bd->lock, flags); + end_entire_request(bd->req, bd->lb_page->result == 1); + bd->req = NULL; + bd->dma.used_len = 0; + blk_start_queue(bd->disk->queue); + spin_unlock_irqrestore(&bd->lock, flags); + return IRQ_HANDLED; +} + +static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma) +{ + unsigned int i = 0, idx, len = 0; + struct bio *bio; + + rq_for_each_bio(bio, req) { + struct bio_vec *bvec; + bio_for_each_segment(bvec, bio, idx) { + BUG_ON(i == LGUEST_MAX_DMA_SECTIONS); + BUG_ON(!bvec->bv_len); + dma->addr[i] = page_to_phys(bvec->bv_page) + + bvec->bv_offset; + dma->len[i] = bvec->bv_len; + len += bvec->bv_len; + i++; + } + } + if (i < LGUEST_MAX_DMA_SECTIONS) + dma->len[i] = 0; + return len; +} + +static void empty_dma(struct lguest_dma *dma) +{ + dma->len[0] = 0; +} + +static void setup_req(struct blockdev *bd, + int type, struct request *req, struct lguest_dma *dma) +{ + bd->lb_page->type = type; + bd->lb_page->sector = req->sector; + bd->lb_page->result = 0; + bd->req = req; + bd->lb_page->bytes = req_to_dma(req, dma); +} + +static void do_write(struct blockdev *bd, struct request *req) +{ + struct lguest_dma send; + + pr_debug("lgb: WRITE sector %li\n", (long)req->sector); + setup_req(bd, 1, req, &send); + + lguest_send_dma(bd->phys_addr, &send); +} + +static void do_read(struct blockdev *bd, struct request *req) +{ + struct lguest_dma ping; + + pr_debug("lgb: READ sector %li\n", (long)req->sector); + setup_req(bd, 0, req, &bd->dma); + + empty_dma(&ping); + lguest_send_dma(bd->phys_addr, &ping); +} + +static void do_lgb_request(request_queue_t *q) +{ + struct blockdev *bd; + struct request *req; + +again: + req = elv_next_request(q); + if (!req) + return; + + bd = req->rq_disk->private_data; + /* Sometimes we get repeated requests after blk_stop_queue. */ + if (bd->req) + return; + + if (!blk_fs_request(req)) { + pr_debug("Got non-command 0x%08x\n", req->cmd_type); + req->errors++; + end_entire_request(req, 0); + goto again; + } + + if (rq_data_dir(req) == WRITE) + do_write(bd, req); + else + do_read(bd, req); + + /* Wait for interrupt to tell us it's done. */ + blk_stop_queue(q); +} + +static struct block_device_operations lguestblk_fops = { + .owner = THIS_MODULE, +}; + +static int lguestblk_probe(struct lguest_device *lgdev) +{ + struct blockdev *bd; + int err; + int irqflags = IRQF_SHARED; + + bd = kmalloc(sizeof(*bd), GFP_KERNEL); + if (!bd) + return -ENOMEM; + + spin_lock_init(&bd->lock); + bd->irq = lgdev_irq(lgdev); + bd->req = NULL; + bd->dma.used_len = 0; + bd->dma.len[0] = 0; + bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT); + + bd->lb_page = lguest_map(bd->phys_addr, 1); + if (!bd->lb_page) { + err = -ENOMEM; + goto out_free_bd; + } + + bd->major = register_blkdev(0, "lguestblk"); + if (bd->major < 0) { + err = bd->major; + goto out_unmap; + } + + bd->disk = alloc_disk(1); + if (!bd->disk) { + err = -ENOMEM; + goto out_unregister_blkdev; + } + + bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock); + if (!bd->disk->queue) { + err = -ENOMEM; + goto out_put_disk; + } + + /* We can only handle a certain number of sg entries */ + blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS); + /* Buffers must not cross page boundaries */ + blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1); + + sprintf(bd->disk->disk_name, "lgb%c", next_block_index++); + if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) + irqflags |= IRQF_SAMPLE_RANDOM; + err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd); + if (err) + goto out_cleanup_queue; + + err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq); + if (err) + goto out_free_irq; + + bd->disk->major = bd->major; + bd->disk->first_minor = 0; + bd->disk->private_data = bd; + bd->disk->fops = &lguestblk_fops; + /* This is initialized to the disk size by the other end. */ + set_capacity(bd->disk, bd->lb_page->num_sectors); + add_disk(bd->disk); + + printk(KERN_INFO "%s: device %i at major %d\n", + bd->disk->disk_name, lgdev->index, bd->major); + + lgdev->private = bd; + return 0; + +out_free_irq: + free_irq(bd->irq, bd); +out_cleanup_queue: + blk_cleanup_queue(bd->disk->queue); +out_put_disk: + put_disk(bd->disk); +out_unregister_blkdev: + unregister_blkdev(bd->major, "lguestblk"); +out_unmap: + lguest_unmap(bd->lb_page); +out_free_bd: + kfree(bd); + return err; +} + +static struct lguest_driver lguestblk_drv = { + .name = "lguestblk", + .owner = THIS_MODULE, + .device_type = LGUEST_DEVICE_T_BLOCK, + .probe = lguestblk_probe, +}; + +static __init int lguestblk_init(void) +{ + return register_lguest_driver(&lguestblk_drv); +} +module_init(lguestblk_init); + +MODULE_DESCRIPTION("Lguest block driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 54509eb3391b..949ae93499e5 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1608,7 +1608,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } #endif - host = kmalloc(sizeof(*host), GFP_KERNEL); + host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) { printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n", pci_name(pdev)); @@ -1616,7 +1616,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_regions; } - memset(host, 0, sizeof(*host)); host->pdev = pdev; host->flags = pci_dac ? FL_DAC : 0; spin_lock_init(&host->lock); diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 8852b8d643cf..4e6f387fd189 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_N_HDLC) += n_hdlc.o obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o obj-$(CONFIG_SX) += sx.o generic_serial.o +obj-$(CONFIG_LGUEST_GUEST) += hvc_lguest.o obj-$(CONFIG_RIO) += rio/ generic_serial.o obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 7b02bf1289a2..3d468f502d2d 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -1721,12 +1721,11 @@ static int get_async_struct(int line, struct async_struct **ret_info) *ret_info = sstate->info; return 0; } - info = kmalloc(sizeof(struct async_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct async_struct), GFP_KERNEL); if (!info) { sstate->count--; return -ENOMEM; } - memset(info, 0, sizeof(struct async_struct)); #ifdef DECLARE_WAITQUEUE init_waitqueue_head(&info->open_wait); init_waitqueue_head(&info->close_wait); diff --git a/drivers/char/drm/via_dmablit.c b/drivers/char/drm/via_dmablit.c index fdb8609dd76f..832de1d9ba7e 100644 --- a/drivers/char/drm/via_dmablit.c +++ b/drivers/char/drm/via_dmablit.c @@ -273,10 +273,9 @@ via_alloc_desc_pages(drm_via_sg_info_t *vsg) vsg->num_desc_pages = (vsg->num_desc + vsg->descriptors_per_page - 1) / vsg->descriptors_per_page; - if (NULL == (vsg->desc_pages = kmalloc(sizeof(void *) * vsg->num_desc_pages, GFP_KERNEL))) + if (NULL == (vsg->desc_pages = kcalloc(vsg->num_desc_pages, sizeof(void *), GFP_KERNEL))) return DRM_ERR(ENOMEM); - memset(vsg->desc_pages, 0, sizeof(void *) * vsg->num_desc_pages); vsg->state = dr_via_desc_pages_alloc; for (i=0; i<vsg->num_desc_pages; ++i) { if (NULL == (vsg->desc_pages[i] = diff --git a/drivers/char/esp.c b/drivers/char/esp.c index 74cd5118af57..2e7ae42a5503 100644 --- a/drivers/char/esp.c +++ b/drivers/char/esp.c @@ -2459,7 +2459,7 @@ static int __init espserial_init(void) return 1; } - info = kmalloc(sizeof(struct esp_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct esp_struct), GFP_KERNEL); if (!info) { @@ -2469,7 +2469,6 @@ static int __init espserial_init(void) return 1; } - memset((void *)info, 0, sizeof(struct esp_struct)); spin_lock_init(&info->lock); /* rx_trigger, tx_trigger are needed by autoconfig */ info->config.rx_trigger = rx_trigger; @@ -2527,7 +2526,7 @@ static int __init espserial_init(void) if (!dma) info->stat_flags |= ESP_STAT_NEVER_DMA; - info = kmalloc(sizeof(struct esp_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct esp_struct), GFP_KERNEL); if (!info) { printk(KERN_ERR "Couldn't allocate memory for esp serial device information\n"); @@ -2536,7 +2535,6 @@ static int __init espserial_init(void) return 0; } - memset((void *)info, 0, sizeof(struct esp_struct)); /* rx_trigger, tx_trigger are needed by autoconfig */ info->config.rx_trigger = rx_trigger; info->config.tx_trigger = tx_trigger; diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c new file mode 100644 index 000000000000..e7b889e404a7 --- /dev/null +++ b/drivers/char/hvc_lguest.c @@ -0,0 +1,102 @@ +/* Simple console for lguest. + * + * Copyright (C) 2006 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/err.h> +#include <linux/init.h> +#include <linux/lguest_bus.h> +#include "hvc_console.h" + +static char inbuf[256]; +static struct lguest_dma cons_input = { .used_len = 0, + .addr[0] = __pa(inbuf), + .len[0] = sizeof(inbuf), + .len[1] = 0 }; + +static int put_chars(u32 vtermno, const char *buf, int count) +{ + struct lguest_dma dma; + + /* FIXME: what if it's over a page boundary? */ + dma.len[0] = count; + dma.len[1] = 0; + dma.addr[0] = __pa(buf); + + lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma); + return count; +} + +static int get_chars(u32 vtermno, char *buf, int count) +{ + static int cons_offset; + + if (!cons_input.used_len) + return 0; + + if (cons_input.used_len - cons_offset < count) + count = cons_input.used_len - cons_offset; + + memcpy(buf, inbuf + cons_offset, count); + cons_offset += count; + if (cons_offset == cons_input.used_len) { + cons_offset = 0; + cons_input.used_len = 0; + } + return count; +} + +static struct hv_ops lguest_cons = { + .get_chars = get_chars, + .put_chars = put_chars, +}; + +static int __init cons_init(void) +{ + if (strcmp(paravirt_ops.name, "lguest") != 0) + return 0; + + return hvc_instantiate(0, 0, &lguest_cons); +} +console_initcall(cons_init); + +static int lguestcons_probe(struct lguest_device *lgdev) +{ + int err; + + lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256); + if (IS_ERR(lgdev->private)) + return PTR_ERR(lgdev->private); + + err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1, + lgdev_irq(lgdev)); + if (err) + printk("lguest console: failed to bind buffer.\n"); + return err; +} + +static struct lguest_driver lguestcons_drv = { + .name = "lguestcons", + .owner = THIS_MODULE, + .device_type = LGUEST_DEVICE_T_CONSOLE, + .probe = lguestcons_probe, +}; + +static int __init hvc_lguest_init(void) +{ + return register_lguest_driver(&lguestcons_drv); +} +module_init(hvc_lguest_init); diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 207f7343ba60..17f96e04266f 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -784,12 +784,10 @@ static int __devinit hvcs_probe( return -EFAULT; } - hvcsd = kmalloc(sizeof(*hvcsd), GFP_KERNEL); + hvcsd = kzalloc(sizeof(*hvcsd), GFP_KERNEL); if (!hvcsd) return -ENODEV; - /* hvcsd->tty is zeroed out with the memset */ - memset(hvcsd, 0x00, sizeof(*hvcsd)); spin_lock_init(&hvcsd->lock); /* Automatically incs the refcount the first time */ diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index 83c7258d3580..6005b5225772 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -425,9 +425,7 @@ cleanup_module(void) printk(KERN_ERR "IP2: failed to unregister tty driver (%d)\n", err); } put_tty_driver(ip2_tty_driver); - if ( ( err = unregister_chrdev ( IP2_IPL_MAJOR, pcIpl ) ) ) { - printk(KERN_ERR "IP2: failed to unregister IPL driver (%d)\n", err); - } + unregister_chrdev(IP2_IPL_MAJOR, pcIpl); remove_proc_entry("ip2mem", &proc_root); // free memory diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index b5df7e61aeb2..6a01dd9e43f8 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2639,10 +2639,9 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, return -ENODEV; } - intf = kmalloc(sizeof(*intf), GFP_KERNEL); + intf = kzalloc(sizeof(*intf), GFP_KERNEL); if (!intf) return -ENOMEM; - memset(intf, 0, sizeof(*intf)); intf->ipmi_version_major = ipmi_version_major(device_id); intf->ipmi_version_minor = ipmi_version_minor(device_id); diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 57f9115a456c..7ee5d9444926 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -39,14 +39,14 @@ #else #define DBG(fmt...) #endif -int mbcs_major; +static int mbcs_major; -LIST_HEAD(soft_list); +static LIST_HEAD(soft_list); /* * file operations */ -const struct file_operations mbcs_ops = { +static const struct file_operations mbcs_ops = { .open = mbcs_open, .llseek = mbcs_sram_llseek, .read = mbcs_sram_read, @@ -377,7 +377,7 @@ dmaread_exit: return rv; } -int mbcs_open(struct inode *ip, struct file *fp) +static int mbcs_open(struct inode *ip, struct file *fp) { struct mbcs_soft *soft; int minor; @@ -394,7 +394,7 @@ int mbcs_open(struct inode *ip, struct file *fp) return -ENODEV; } -ssize_t mbcs_sram_read(struct file * fp, char __user *buf, size_t len, loff_t * off) +static ssize_t mbcs_sram_read(struct file * fp, char __user *buf, size_t len, loff_t * off) { struct cx_dev *cx_dev = fp->private_data; struct mbcs_soft *soft = cx_dev->soft; @@ -418,7 +418,7 @@ ssize_t mbcs_sram_read(struct file * fp, char __user *buf, size_t len, loff_t * return rv; } -ssize_t +static ssize_t mbcs_sram_write(struct file * fp, const char __user *buf, size_t len, loff_t * off) { struct cx_dev *cx_dev = fp->private_data; @@ -443,7 +443,7 @@ mbcs_sram_write(struct file * fp, const char __user *buf, size_t len, loff_t * o return rv; } -loff_t mbcs_sram_llseek(struct file * filp, loff_t off, int whence) +static loff_t mbcs_sram_llseek(struct file * filp, loff_t off, int whence) { loff_t newpos; @@ -491,7 +491,7 @@ static void mbcs_gscr_pioaddr_set(struct mbcs_soft *soft) soft->gscr_addr = mbcs_pioaddr(soft, MBCS_GSCR_START); } -int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma) +static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma) { struct cx_dev *cx_dev = fp->private_data; struct mbcs_soft *soft = cx_dev->soft; @@ -793,7 +793,7 @@ static int mbcs_remove(struct cx_dev *dev) return 0; } -const struct cx_device_id __devinitdata mbcs_id_table[] = { +static const struct cx_device_id __devinitdata mbcs_id_table[] = { { .part_num = MBCS_PART_NUM, .mfg_num = MBCS_MFG_NUM, @@ -807,7 +807,7 @@ const struct cx_device_id __devinitdata mbcs_id_table[] = { MODULE_DEVICE_TABLE(cx, mbcs_id_table); -struct cx_drv mbcs_driver = { +static struct cx_drv mbcs_driver = { .name = DEVICE_NAME, .id_table = mbcs_id_table, .probe = mbcs_probe, @@ -816,12 +816,7 @@ struct cx_drv mbcs_driver = { static void __exit mbcs_exit(void) { - int rv; - - rv = unregister_chrdev(mbcs_major, DEVICE_NAME); - if (rv < 0) - DBG(KERN_ALERT "Error in unregister_chrdev: %d\n", rv); - + unregister_chrdev(mbcs_major, DEVICE_NAME); cx_driver_unregister(&mbcs_driver); } diff --git a/drivers/char/mbcs.h b/drivers/char/mbcs.h index e7fd47e43257..c9905a3c3353 100644 --- a/drivers/char/mbcs.h +++ b/drivers/char/mbcs.h @@ -542,12 +542,12 @@ struct mbcs_soft { struct semaphore algolock; }; -extern int mbcs_open(struct inode *ip, struct file *fp); -extern ssize_t mbcs_sram_read(struct file *fp, char __user *buf, size_t len, +static int mbcs_open(struct inode *ip, struct file *fp); +static ssize_t mbcs_sram_read(struct file *fp, char __user *buf, size_t len, loff_t * off); -extern ssize_t mbcs_sram_write(struct file *fp, const char __user *buf, size_t len, +static ssize_t mbcs_sram_write(struct file *fp, const char __user *buf, size_t len, loff_t * off); -extern loff_t mbcs_sram_llseek(struct file *filp, loff_t off, int whence); -extern int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma); +static loff_t mbcs_sram_llseek(struct file *filp, loff_t off, int whence); +static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma); #endif // __MBCS_H__ diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 13808f6083a0..2b889317461e 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -540,13 +540,12 @@ static int mgslpc_probe(struct pcmcia_device *link) if (debug_level >= DEBUG_LEVEL_INFO) printk("mgslpc_attach\n"); - info = kmalloc(sizeof(MGSLPC_INFO), GFP_KERNEL); + info = kzalloc(sizeof(MGSLPC_INFO), GFP_KERNEL); if (!info) { printk("Error can't allocate device instance data\n"); return -ENOMEM; } - memset(info, 0, sizeof(MGSLPC_INFO)); info->magic = MGSLPC_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index 294e9cb0c449..0ce96670f979 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -803,9 +803,7 @@ static void *ckmalloc(int size) { void *p; - p = kmalloc(size, GFP_KERNEL); - if (p) - memset(p, 0, size); + p = kzalloc(size, GFP_KERNEL); return p; } diff --git a/drivers/char/rio/riocmd.c b/drivers/char/rio/riocmd.c index 8cc60b693460..7321d002c34f 100644 --- a/drivers/char/rio/riocmd.c +++ b/drivers/char/rio/riocmd.c @@ -556,9 +556,7 @@ struct CmdBlk *RIOGetCmdBlk(void) { struct CmdBlk *CmdBlkP; - CmdBlkP = kmalloc(sizeof(struct CmdBlk), GFP_ATOMIC); - if (CmdBlkP) - memset(CmdBlkP, 0, sizeof(struct CmdBlk)); + CmdBlkP = kzalloc(sizeof(struct CmdBlk), GFP_ATOMIC); return CmdBlkP; } diff --git a/drivers/char/rio/riotable.c b/drivers/char/rio/riotable.c index 7e988357326e..991119c9f473 100644 --- a/drivers/char/rio/riotable.c +++ b/drivers/char/rio/riotable.c @@ -863,8 +863,7 @@ int RIOReMapPorts(struct rio_info *p, struct Host *HostP, struct Map *HostMapP) if (PortP->TxRingBuffer) memset(PortP->TxRingBuffer, 0, p->RIOBufferSize); else if (p->RIOBufferSize) { - PortP->TxRingBuffer = kmalloc(p->RIOBufferSize, GFP_KERNEL); - memset(PortP->TxRingBuffer, 0, p->RIOBufferSize); + PortP->TxRingBuffer = kzalloc(p->RIOBufferSize, GFP_KERNEL); } PortP->TxBufferOut = 0; PortP->TxBufferIn = 0; diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 0270080ff0c0..56cbba7b6ec0 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -635,12 +635,11 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev) ctlp = sCtlNumToCtlPtr(board); /* Get a r_port struct for the port, fill it in and save it globally, indexed by line number */ - info = kmalloc(sizeof (struct r_port), GFP_KERNEL); + info = kzalloc(sizeof (struct r_port), GFP_KERNEL); if (!info) { printk(KERN_INFO "Couldn't allocate info struct for line #%d\n", line); return; } - memset(info, 0, sizeof (struct r_port)); info->magic = RPORT_MAGIC; info->line = line; diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 93d0bb8b4c0f..4a80b2f864e0 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -4795,7 +4795,6 @@ static void __exit stallion_module_exit(void) { struct stlbrd *brdp; unsigned int i, j; - int retval; pr_debug("cleanup_module()\n"); @@ -4818,9 +4817,7 @@ static void __exit stallion_module_exit(void) for (i = 0; i < 4; i++) class_device_destroy(stallion_class, MKDEV(STL_SIOMEMMAJOR, i)); - if ((retval = unregister_chrdev(STL_SIOMEMMAJOR, "staliomem"))) - printk("STALLION: failed to un-register serial memory device, " - "errno=%d\n", -retval); + unregister_chrdev(STL_SIOMEMMAJOR, "staliomem"); class_destroy(stallion_class); pci_unregister_driver(&stl_pcidriver); diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index f53e51ddb9d7..fdc256b380b8 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -4324,13 +4324,12 @@ static struct mgsl_struct* mgsl_allocate_device(void) { struct mgsl_struct *info; - info = kmalloc(sizeof(struct mgsl_struct), + info = kzalloc(sizeof(struct mgsl_struct), GFP_KERNEL); if (!info) { printk("Error can't allocate device instance data\n"); } else { - memset(info, 0, sizeof(struct mgsl_struct)); info->magic = MGSL_MAGIC; INIT_WORK(&info->task, mgsl_bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 428b514201f4..372a37e25620 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -3414,13 +3414,12 @@ static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev { struct slgt_info *info; - info = kmalloc(sizeof(struct slgt_info), GFP_KERNEL); + info = kzalloc(sizeof(struct slgt_info), GFP_KERNEL); if (!info) { DBGERR(("%s device alloc failed adapter=%d port=%d\n", driver_name, adapter_num, port_num)); } else { - memset(info, 0, sizeof(struct slgt_info)); info->magic = MGSL_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index a65407b32079..c63013b2fc36 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -3786,14 +3786,13 @@ static SLMP_INFO *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev) { SLMP_INFO *info; - info = kmalloc(sizeof(SLMP_INFO), + info = kzalloc(sizeof(SLMP_INFO), GFP_KERNEL); if (!info) { printk("%s(%d) Error can't allocate device instance data for adapter %d, port %d\n", __FILE__,__LINE__, adapter_num, port_num); } else { - memset(info, 0, sizeof(SLMP_INFO)); info->magic = MGSL_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index db57277117bb..e12275df6ea2 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -1098,15 +1098,10 @@ static int chg_state(int index, unsigned char new_state, struct file *file) /* Cleanup */ static void __exit viotap_exit(void) { - int ret; - remove_proc_entry("iSeries/viotape", NULL); vio_unregister_driver(&viotape_driver); class_destroy(tape_class); - ret = unregister_chrdev(VIOTAPE_MAJOR, "viotape"); - if (ret < 0) - printk(VIOTAPE_KERN_WARN "Error unregistering device: %d\n", - ret); + unregister_chrdev(VIOTAPE_MAJOR, "viotape"); if (viotape_unitinfo) dma_free_coherent(iSeries_vio_dev, sizeof(viotape_unitinfo[0]) * VIOTAPE_MAX_TAPE, diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c index e88947f8fe53..0d2b27735419 100644 --- a/drivers/char/watchdog/mpcore_wdt.c +++ b/drivers/char/watchdog/mpcore_wdt.c @@ -328,12 +328,11 @@ static int __devinit mpcore_wdt_probe(struct platform_device *dev) goto err_out; } - wdt = kmalloc(sizeof(struct mpcore_wdt), GFP_KERNEL); + wdt = kzalloc(sizeof(struct mpcore_wdt), GFP_KERNEL); if (!wdt) { ret = -ENOMEM; goto err_out; } - memset(wdt, 0, sizeof(struct mpcore_wdt)); wdt->dev = &dev->dev; wdt->irq = platform_get_irq(dev, 0); diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c index 1e7a6719d5ba..0f3fd6c9c354 100644 --- a/drivers/char/watchdog/pcwd_usb.c +++ b/drivers/char/watchdog/pcwd_usb.c @@ -626,12 +626,11 @@ static int usb_pcwd_probe(struct usb_interface *interface, const struct usb_devi maxp = usb_maxpacket(udev, pipe, usb_pipeout(pipe)); /* allocate memory for our device and initialize it */ - usb_pcwd = kmalloc (sizeof(struct usb_pcwd_private), GFP_KERNEL); + usb_pcwd = kzalloc (sizeof(struct usb_pcwd_private), GFP_KERNEL); if (usb_pcwd == NULL) { printk(KERN_ERR PFX "Out of memory\n"); goto error; } - memset (usb_pcwd, 0x00, sizeof (*usb_pcwd)); usb_pcwd_device = usb_pcwd; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index debf1d8e8b41..1724c41d2414 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -3,18 +3,18 @@ # Copyright (c) 2003 Linux Networx # Licensed and distributed under the GPL # -# $Id: Kconfig,v 1.4.2.7 2005/07/08 22:05:38 dsp_llnl Exp $ -# menuconfig EDAC - tristate "EDAC - error detection and reporting (EXPERIMENTAL)" + bool "EDAC - error detection and reporting (EXPERIMENTAL)" depends on HAS_IOMEM - depends on X86 && EXPERIMENTAL + depends on EXPERIMENTAL + depends on X86 || MIPS || PPC help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or - supporting chipset: memory errors, cache errors, PCI errors, - thermal throttling, etc.. If unsure, select 'Y'. + supporting chipset or other subsystems: + memory errors, cache errors, PCI errors, thermal throttling, etc.. + If unsure, select 'Y'. If this code is reporting problems on your system, please see the EDAC project web pages for more information at: @@ -73,6 +73,14 @@ config EDAC_E752X Support for error detection and correction on the Intel E7520, E7525, E7320 server chipsets. +config EDAC_I82443BXGX + tristate "Intel 82443BX/GX (440BX/GX)" + depends on EDAC_MM_EDAC && PCI && X86_32 + depends on BROKEN + help + Support for error detection and correction on the Intel + 82443BX/GX memory controllers (440BX/GX chipsets). + config EDAC_I82875P tristate "Intel 82875p (D82875P, E7210)" depends on EDAC_MM_EDAC && PCI && X86_32 @@ -80,6 +88,20 @@ config EDAC_I82875P Support for error detection and correction on the Intel DP82785P and E7210 server chipsets. +config EDAC_I82975X + tristate "Intel 82975x (D82975x)" + depends on EDAC_MM_EDAC && PCI && X86 + help + Support for error detection and correction on the Intel + DP82975x server chipsets. + +config EDAC_I3000 + tristate "Intel 3000/3010" + depends on EDAC_MM_EDAC && PCI && X86_32 + help + Support for error detection and correction on the Intel + 3000 and 3010 server chipsets. + config EDAC_I82860 tristate "Intel 82860" depends on EDAC_MM_EDAC && PCI && X86_32 @@ -94,15 +116,20 @@ config EDAC_R82600 Support for error detection and correction on the Radisys 82600 embedded chipset. -choice - prompt "Error detecting method" - default EDAC_POLL +config EDAC_I5000 + tristate "Intel Greencreek/Blackford chipset" + depends on EDAC_MM_EDAC && X86 && PCI + help + Support for error detection and correction the Intel + Greekcreek/Blackford chipsets. -config EDAC_POLL - bool "Poll for errors" +config EDAC_PASEMI + tristate "PA Semi PWRficient" + depends on EDAC_MM_EDAC && PCI + depends on PPC help - Poll the chipset periodically to detect errors. + Support for error detection and correction on PA Semi + PWRficient. -endchoice endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 93137fdab4b3..02c09f0ff157 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -5,14 +5,27 @@ # This file may be distributed under the terms of the # GNU General Public License. # -# $Id: Makefile,v 1.4.2.3 2005/07/08 22:05:38 dsp_llnl Exp $ -obj-$(CONFIG_EDAC_MM_EDAC) += edac_mc.o +obj-$(CONFIG_EDAC) := edac_stub.o +obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o + +edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o +edac_core-objs += edac_module.o edac_device_sysfs.o + +ifdef CONFIG_PCI +edac_core-objs += edac_pci.o edac_pci_sysfs.o +endif + obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o +obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o +obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o +obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o +obj-$(CONFIG_EDAC_I3000) += i3000_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o +obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index f79f6b587bfa..f22075410591 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -17,9 +17,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define AMD76X_REVISION " Ver: 2.0.1 " __DATE__ +#define AMD76X_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "amd76x_edac" #define amd76x_printk(level, fmt, arg...) \ @@ -86,13 +86,13 @@ struct amd76x_dev_info { static const struct amd76x_dev_info amd76x_devs[] = { [AMD761] = { - .ctl_name = "AMD761" - }, + .ctl_name = "AMD761"}, [AMD762] = { - .ctl_name = "AMD762" - }, + .ctl_name = "AMD762"}, }; +static struct edac_pci_ctl_info *amd76x_pci; + /** * amd76x_get_error_info - fetch error information * @mci: Memory controller @@ -102,21 +102,21 @@ static const struct amd76x_dev_info amd76x_devs[] = { * on the chip so that further errors will be reported */ static void amd76x_get_error_info(struct mem_ctl_info *mci, - struct amd76x_error_info *info) + struct amd76x_error_info *info) { struct pci_dev *pdev; pdev = to_pci_dev(mci->dev); pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, - &info->ecc_mode_status); + &info->ecc_mode_status); if (info->ecc_mode_status & BIT(8)) pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS, - (u32) BIT(8), (u32) BIT(8)); + (u32) BIT(8), (u32) BIT(8)); if (info->ecc_mode_status & BIT(9)) pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS, - (u32) BIT(9), (u32) BIT(9)); + (u32) BIT(9), (u32) BIT(9)); } /** @@ -130,7 +130,8 @@ static void amd76x_get_error_info(struct mem_ctl_info *mci, * then attempt to handle and clean up after the error */ static int amd76x_process_error_info(struct mem_ctl_info *mci, - struct amd76x_error_info *info, int handle_errors) + struct amd76x_error_info *info, + int handle_errors) { int error_found; u32 row; @@ -138,7 +139,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, error_found = 0; /* - * Check for an uncorrectable error + * Check for an uncorrectable error */ if (info->ecc_mode_status & BIT(8)) { error_found = 1; @@ -146,12 +147,12 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, if (handle_errors) { row = (info->ecc_mode_status >> 4) & 0xf; edac_mc_handle_ue(mci, mci->csrows[row].first_page, 0, - row, mci->ctl_name); + row, mci->ctl_name); } } /* - * Check for a correctable error + * Check for a correctable error */ if (info->ecc_mode_status & BIT(9)) { error_found = 1; @@ -159,7 +160,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci, if (handle_errors) { row = info->ecc_mode_status & 0xf; edac_mc_handle_ce(mci, mci->csrows[row].first_page, 0, - 0, row, 0, mci->ctl_name); + 0, row, 0, mci->ctl_name); } } @@ -182,7 +183,7 @@ static void amd76x_check(struct mem_ctl_info *mci) } static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - enum edac_type edac_mode) + enum edac_type edac_mode) { struct csrow_info *csrow; u32 mba, mba_base, mba_mask, dms; @@ -193,8 +194,7 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, /* find the DRAM Chip Select Base address and mask */ pci_read_config_dword(pdev, - AMD76X_MEM_BASE_ADDR + (index * 4), - &mba); + AMD76X_MEM_BASE_ADDR + (index * 4), &mba); if (!(mba & BIT(0))) continue; @@ -238,7 +238,7 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) debugf0("%s()\n", __func__); pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems); ems_mode = (ems >> 10) & 0x3; - mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS); + mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS, 0); if (mci == NULL) { return -ENOMEM; @@ -249,24 +249,36 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; mci->edac_cap = ems_mode ? - (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE; + (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE; mci->mod_name = EDAC_MOD_STR; mci->mod_ver = AMD76X_REVISION; mci->ctl_name = amd76x_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = amd76x_check; mci->ctl_page_to_phys = NULL; amd76x_init_csrows(mci, pdev, ems_modes[ems_mode]); - amd76x_get_error_info(mci, &discard); /* clear counters */ + amd76x_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } + /* allocating generic PCI control info */ + amd76x_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!amd76x_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); return 0; @@ -278,7 +290,7 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit amd76x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); @@ -300,6 +312,9 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (amd76x_pci) + edac_pci_release_generic_ctl(amd76x_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; @@ -308,16 +323,14 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - AMD762 - }, + PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + AMD762}, { - PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - AMD761 - }, + PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + AMD761}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 8bcc887692ab..3bba224cb55d 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -22,13 +22,16 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include <linux/edac.h> +#include "edac_core.h" -#define E752X_REVISION " Ver: 2.0.1 " __DATE__ +#define E752X_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "e752x_edac" static int force_function_unhide; +static struct edac_pci_ctl_info *e752x_pci; + #define e752x_printk(level, fmt, arg...) \ edac_printk(level, "e752x", fmt, ##arg) @@ -203,25 +206,22 @@ static const struct e752x_dev_info e752x_devs[] = { [E7520] = { .err_dev = PCI_DEVICE_ID_INTEL_7520_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7520_0, - .ctl_name = "E7520" - }, + .ctl_name = "E7520"}, [E7525] = { .err_dev = PCI_DEVICE_ID_INTEL_7525_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7525_0, - .ctl_name = "E7525" - }, + .ctl_name = "E7525"}, [E7320] = { .err_dev = PCI_DEVICE_ID_INTEL_7320_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7320_0, - .ctl_name = "E7320" - }, + .ctl_name = "E7320"}, }; static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, - unsigned long page) + unsigned long page) { u32 remap; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); @@ -241,13 +241,13 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, } static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, - u32 sec1_add, u16 sec1_syndrome) + u32 sec1_add, u16 sec1_syndrome) { u32 page; int row; int channel; int i; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); @@ -261,7 +261,8 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, e752x_printk(KERN_WARNING, "Test row %d Table %d %d %d %d %d %d %d %d\n", row, pvt->map[0], pvt->map[1], pvt->map[2], pvt->map[3], - pvt->map[4], pvt->map[5], pvt->map[6], pvt->map[7]); + pvt->map[4], pvt->map[5], pvt->map[6], + pvt->map[7]); /* test for channel remapping */ for (i = 0; i < 8; i++) { @@ -275,24 +276,22 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one, row = i; else e752x_mc_printk(mci, KERN_WARNING, - "row %d not found in remap table\n", row); + "row %d not found in remap table\n", + row); } else row = edac_mc_find_csrow_by_page(mci, page); /* 0 = channel A, 1 = channel B */ channel = !(error_one & 1); - if (!pvt->map_type) - row = 7 - row; - /* e752x mc reads 34:6 of the DRAM linear address */ edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4), sec1_syndrome, row, channel, "e752x CE"); } static inline void process_ce(struct mem_ctl_info *mci, u16 error_one, - u32 sec1_add, u16 sec1_syndrome, int *error_found, - int handle_error) + u32 sec1_add, u16 sec1_syndrome, int *error_found, + int handle_error) { *error_found = 1; @@ -301,11 +300,11 @@ static inline void process_ce(struct mem_ctl_info *mci, u16 error_one, } static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, - u32 ded_add, u32 scrb_add) + u32 ded_add, u32 scrb_add) { u32 error_2b, block_page; int row; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); @@ -316,14 +315,14 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, block_page = error_2b >> (PAGE_SHIFT - 4); row = pvt->mc_symmetric ? - /* chip select are bits 14 & 13 */ + /* chip select are bits 14 & 13 */ ((block_page >> 1) & 3) : edac_mc_find_csrow_by_page(mci, block_page); /* e752x mc reads 34:6 of the DRAM linear address */ edac_mc_handle_ue(mci, block_page, - offset_in_page(error_2b << 4), - row, "e752x UE from Read"); + offset_in_page(error_2b << 4), + row, "e752x UE from Read"); } if (error_one & 0x0404) { error_2b = scrb_add; @@ -332,19 +331,20 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one, block_page = error_2b >> (PAGE_SHIFT - 4); row = pvt->mc_symmetric ? - /* chip select are bits 14 & 13 */ + /* chip select are bits 14 & 13 */ ((block_page >> 1) & 3) : edac_mc_find_csrow_by_page(mci, block_page); /* e752x mc reads 34:6 of the DRAM linear address */ edac_mc_handle_ue(mci, block_page, - offset_in_page(error_2b << 4), - row, "e752x UE from Scruber"); + offset_in_page(error_2b << 4), + row, "e752x UE from Scruber"); } } static inline void process_ue(struct mem_ctl_info *mci, u16 error_one, - u32 ded_add, u32 scrb_add, int *error_found, int handle_error) + u32 ded_add, u32 scrb_add, int *error_found, + int handle_error) { *error_found = 1; @@ -353,7 +353,7 @@ static inline void process_ue(struct mem_ctl_info *mci, u16 error_one, } static inline void process_ue_no_info_wr(struct mem_ctl_info *mci, - int *error_found, int handle_error) + int *error_found, int handle_error) { *error_found = 1; @@ -365,24 +365,24 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci, } static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error, - u32 retry_add) + u32 retry_add) { u32 error_1b, page; int row; - struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; + struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; error_1b = retry_add; - page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ - row = pvt->mc_symmetric ? - ((page >> 1) & 3) : /* chip select are bits 14 & 13 */ + page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ + row = pvt->mc_symmetric ? ((page >> 1) & 3) : /* chip select are bits 14 & 13 */ edac_mc_find_csrow_by_page(mci, page); e752x_mc_printk(mci, KERN_WARNING, - "CE page 0x%lx, row %d : Memory read retry\n", - (long unsigned int) page, row); + "CE page 0x%lx, row %d : Memory read retry\n", + (long unsigned int)page, row); } static inline void process_ded_retry(struct mem_ctl_info *mci, u16 error, - u32 retry_add, int *error_found, int handle_error) + u32 retry_add, int *error_found, + int handle_error) { *error_found = 1; @@ -391,7 +391,7 @@ static inline void process_ded_retry(struct mem_ctl_info *mci, u16 error, } static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error, - int *error_found, int handle_error) + int *error_found, int handle_error) { *error_found = 1; @@ -420,7 +420,7 @@ static void do_global_error(int fatal, u32 errors) } static inline void global_error(int fatal, u32 errors, int *error_found, - int handle_error) + int handle_error) { *error_found = 1; @@ -447,7 +447,7 @@ static void do_hub_error(int fatal, u8 errors) } static inline void hub_error(int fatal, u8 errors, int *error_found, - int handle_error) + int handle_error) { *error_found = 1; @@ -505,7 +505,7 @@ static void do_sysbus_error(int fatal, u32 errors) } static inline void sysbus_error(int fatal, u32 errors, int *error_found, - int handle_error) + int handle_error) { *error_found = 1; @@ -514,7 +514,7 @@ static inline void sysbus_error(int fatal, u32 errors, int *error_found, } static void e752x_check_hub_interface(struct e752x_error_info *info, - int *error_found, int handle_error) + int *error_found, int handle_error) { u8 stat8; @@ -522,33 +522,32 @@ static void e752x_check_hub_interface(struct e752x_error_info *info, stat8 = info->hi_ferr; - if(stat8 & 0x7f) { /* Error, so process */ + if (stat8 & 0x7f) { /* Error, so process */ stat8 &= 0x7f; - if(stat8 & 0x2b) + if (stat8 & 0x2b) hub_error(1, stat8 & 0x2b, error_found, handle_error); - if(stat8 & 0x54) + if (stat8 & 0x54) hub_error(0, stat8 & 0x54, error_found, handle_error); } - //pci_read_config_byte(dev,E752X_HI_NERR,&stat8); stat8 = info->hi_nerr; - if(stat8 & 0x7f) { /* Error, so process */ + if (stat8 & 0x7f) { /* Error, so process */ stat8 &= 0x7f; if (stat8 & 0x2b) hub_error(1, stat8 & 0x2b, error_found, handle_error); - if(stat8 & 0x54) + if (stat8 & 0x54) hub_error(0, stat8 & 0x54, error_found, handle_error); } } static void e752x_check_sysbus(struct e752x_error_info *info, - int *error_found, int handle_error) + int *error_found, int handle_error) { u32 stat32, error32; @@ -556,47 +555,47 @@ static void e752x_check_sysbus(struct e752x_error_info *info, stat32 = info->sysbus_ferr + (info->sysbus_nerr << 16); if (stat32 == 0) - return; /* no errors */ + return; /* no errors */ error32 = (stat32 >> 16) & 0x3ff; stat32 = stat32 & 0x3ff; - if(stat32 & 0x087) + if (stat32 & 0x087) sysbus_error(1, stat32 & 0x087, error_found, handle_error); - if(stat32 & 0x378) + if (stat32 & 0x378) sysbus_error(0, stat32 & 0x378, error_found, handle_error); - if(error32 & 0x087) + if (error32 & 0x087) sysbus_error(1, error32 & 0x087, error_found, handle_error); - if(error32 & 0x378) + if (error32 & 0x378) sysbus_error(0, error32 & 0x378, error_found, handle_error); } -static void e752x_check_membuf (struct e752x_error_info *info, - int *error_found, int handle_error) +static void e752x_check_membuf(struct e752x_error_info *info, + int *error_found, int handle_error) { u8 stat8; stat8 = info->buf_ferr; - if (stat8 & 0x0f) { /* Error, so process */ + if (stat8 & 0x0f) { /* Error, so process */ stat8 &= 0x0f; membuf_error(stat8, error_found, handle_error); } stat8 = info->buf_nerr; - if (stat8 & 0x0f) { /* Error, so process */ + if (stat8 & 0x0f) { /* Error, so process */ stat8 &= 0x0f; membuf_error(stat8, error_found, handle_error); } } -static void e752x_check_dram (struct mem_ctl_info *mci, - struct e752x_error_info *info, int *error_found, - int handle_error) +static void e752x_check_dram(struct mem_ctl_info *mci, + struct e752x_error_info *info, int *error_found, + int handle_error) { u16 error_one, error_next; @@ -604,55 +603,52 @@ static void e752x_check_dram (struct mem_ctl_info *mci, error_next = info->dram_nerr; /* decode and report errors */ - if(error_one & 0x0101) /* check first error correctable */ + if (error_one & 0x0101) /* check first error correctable */ process_ce(mci, error_one, info->dram_sec1_add, - info->dram_sec1_syndrome, error_found, - handle_error); + info->dram_sec1_syndrome, error_found, handle_error); - if(error_next & 0x0101) /* check next error correctable */ + if (error_next & 0x0101) /* check next error correctable */ process_ce(mci, error_next, info->dram_sec2_add, - info->dram_sec2_syndrome, error_found, - handle_error); + info->dram_sec2_syndrome, error_found, handle_error); - if(error_one & 0x4040) + if (error_one & 0x4040) process_ue_no_info_wr(mci, error_found, handle_error); - if(error_next & 0x4040) + if (error_next & 0x4040) process_ue_no_info_wr(mci, error_found, handle_error); - if(error_one & 0x2020) + if (error_one & 0x2020) process_ded_retry(mci, error_one, info->dram_retr_add, - error_found, handle_error); + error_found, handle_error); - if(error_next & 0x2020) + if (error_next & 0x2020) process_ded_retry(mci, error_next, info->dram_retr_add, - error_found, handle_error); + error_found, handle_error); - if(error_one & 0x0808) - process_threshold_ce(mci, error_one, error_found, - handle_error); + if (error_one & 0x0808) + process_threshold_ce(mci, error_one, error_found, handle_error); - if(error_next & 0x0808) + if (error_next & 0x0808) process_threshold_ce(mci, error_next, error_found, - handle_error); + handle_error); - if(error_one & 0x0606) + if (error_one & 0x0606) process_ue(mci, error_one, info->dram_ded_add, - info->dram_scrb_add, error_found, handle_error); + info->dram_scrb_add, error_found, handle_error); - if(error_next & 0x0606) + if (error_next & 0x0606) process_ue(mci, error_next, info->dram_ded_add, - info->dram_scrb_add, error_found, handle_error); + info->dram_scrb_add, error_found, handle_error); } -static void e752x_get_error_info (struct mem_ctl_info *mci, - struct e752x_error_info *info) +static void e752x_get_error_info(struct mem_ctl_info *mci, + struct e752x_error_info *info) { struct pci_dev *dev; struct e752x_pvt *pvt; memset(info, 0, sizeof(*info)); - pvt = (struct e752x_pvt *) mci->pvt_info; + pvt = (struct e752x_pvt *)mci->pvt_info; dev = pvt->dev_d0f1; pci_read_config_dword(dev, E752X_FERR_GLOBAL, &info->ferr_global); @@ -661,8 +657,7 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, pci_read_config_word(dev, E752X_SYSBUS_FERR, &info->sysbus_ferr); pci_read_config_byte(dev, E752X_BUF_FERR, &info->buf_ferr); - pci_read_config_word(dev, E752X_DRAM_FERR, - &info->dram_ferr); + pci_read_config_word(dev, E752X_DRAM_FERR, &info->dram_ferr); pci_read_config_dword(dev, E752X_DRAM_SEC1_ADD, &info->dram_sec1_add); pci_read_config_word(dev, E752X_DRAM_SEC1_SYNDROME, @@ -688,7 +683,7 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, if (info->dram_ferr) pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, - info->dram_ferr, info->dram_ferr); + info->dram_ferr, info->dram_ferr); pci_write_config_dword(dev, E752X_FERR_GLOBAL, info->ferr_global); @@ -701,8 +696,7 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, pci_read_config_word(dev, E752X_SYSBUS_NERR, &info->sysbus_nerr); pci_read_config_byte(dev, E752X_BUF_NERR, &info->buf_nerr); - pci_read_config_word(dev, E752X_DRAM_NERR, - &info->dram_nerr); + pci_read_config_word(dev, E752X_DRAM_NERR, &info->dram_nerr); pci_read_config_dword(dev, E752X_DRAM_SEC2_ADD, &info->dram_sec2_add); pci_read_config_word(dev, E752X_DRAM_SEC2_SYNDROME, @@ -722,15 +716,16 @@ static void e752x_get_error_info (struct mem_ctl_info *mci, if (info->dram_nerr) pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, - info->dram_nerr, info->dram_nerr); + info->dram_nerr, info->dram_nerr); pci_write_config_dword(dev, E752X_NERR_GLOBAL, info->nerr_global); } } -static int e752x_process_error_info (struct mem_ctl_info *mci, - struct e752x_error_info *info, int handle_errors) +static int e752x_process_error_info(struct mem_ctl_info *mci, + struct e752x_error_info *info, + int handle_errors) { u32 error32, stat32; int error_found; @@ -776,26 +771,38 @@ static inline int dual_channel_active(u16 ddrcsr) return (((ddrcsr >> 12) & 3) == 3); } +/* Remap csrow index numbers if map_type is "reverse" + */ +static inline int remap_csrow_index(struct mem_ctl_info *mci, int index) +{ + struct e752x_pvt *pvt = mci->pvt_info; + + if (!pvt->map_type) + return (7 - index); + + return (index); +} + static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - u16 ddrcsr) + u16 ddrcsr) { struct csrow_info *csrow; unsigned long last_cumul_size; int index, mem_dev, drc_chan; - int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */ - int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ + int drc_drbg; /* DRB granularity 0=64mb, 1=128mb */ + int drc_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ u8 value; u32 dra, drc, cumul_size; dra = 0; - for (index=0; index < 4; index++) { + for (index = 0; index < 4; index++) { u8 dra_reg; - pci_read_config_byte(pdev, E752X_DRA+index, &dra_reg); + pci_read_config_byte(pdev, E752X_DRA + index, &dra_reg); dra |= dra_reg << (index * 8); } pci_read_config_dword(pdev, E752X_DRC, &drc); drc_chan = dual_channel_active(ddrcsr); - drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ + drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ drc_ddim = (drc >> 20) & 0x3; /* The dram row boundary (DRB) reg values are boundary address for @@ -806,7 +813,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, for (last_cumul_size = index = 0; index < mci->nr_csrows; index++) { /* mem_dev 0=x8, 1=x4 */ mem_dev = (dra >> (index * 4 + 2)) & 0x3; - csrow = &mci->csrows[index]; + csrow = &mci->csrows[remap_csrow_index(mci, index)]; mem_dev = (mem_dev == 2); pci_read_config_byte(pdev, E752X_DRB + index, &value); @@ -843,10 +850,10 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, } static void e752x_init_mem_map_table(struct pci_dev *pdev, - struct e752x_pvt *pvt) + struct e752x_pvt *pvt) { int index; - u8 value, last, row, stat8; + u8 value, last, row; last = 0; row = 0; @@ -858,7 +865,7 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, /* no dimm in the slot, so flag it as empty */ pvt->map[index] = 0xff; pvt->map[index + 1] = 0xff; - } else { /* there is a dimm in the slot */ + } else { /* there is a dimm in the slot */ pvt->map[index] = row; row++; last = value; @@ -866,31 +873,25 @@ static void e752x_init_mem_map_table(struct pci_dev *pdev, * sided */ pci_read_config_byte(pdev, E752X_DRB + index + 1, - &value); - pvt->map[index + 1] = (value == last) ? - 0xff : /* the dimm is single sided, - so flag as empty */ - row; /* this is a double sided dimm - to save the next row # */ + &value); + + /* the dimm is single sided, so flag as empty */ + /* this is a double sided dimm to save the next row #*/ + pvt->map[index + 1] = (value == last) ? 0xff : row; row++; last = value; } } - - /* set the map type. 1 = normal, 0 = reversed */ - pci_read_config_byte(pdev, E752X_DRM, &stat8); - pvt->map_type = ((stat8 & 0x0f) > ((stat8 >> 4) & 0x0f)); } /* Return 0 on success or 1 on failure. */ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, - struct e752x_pvt *pvt) + struct e752x_pvt *pvt) { struct pci_dev *dev; pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, - pvt->bridge_ck); + pvt->dev_info->err_dev, pvt->bridge_ck); if (pvt->bridge_ck == NULL) pvt->bridge_ck = pci_scan_single_device(pdev->bus, @@ -898,13 +899,13 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx, if (pvt->bridge_ck == NULL) { e752x_printk(KERN_ERR, "error reporting device not found:" - "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); + "vendor %x device 0x%x (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev); return 1; } dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, - NULL); + NULL); if (dev == NULL) goto fail; @@ -942,12 +943,22 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) struct mem_ctl_info *mci; struct e752x_pvt *pvt; u16 ddrcsr; - int drc_chan; /* Number of channels 0=1chan,1=2chan */ + int drc_chan; /* Number of channels 0=1chan,1=2chan */ struct e752x_error_info discard; debugf0("%s(): mci\n", __func__); debugf0("Starting Probe1\n"); + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + /* check to see if device 0 function 1 is enabled; if it isn't, we * assume the BIOS has reserved it for a reason and is expecting * exclusive access, we take care not to violate that assumption and @@ -966,7 +977,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) /* Dual channel = 1, Single channel = 0 */ drc_chan = dual_channel_active(ddrcsr); - mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1); + mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0); if (mci == NULL) { return -ENOMEM; @@ -975,14 +986,14 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): init mci\n", __func__); mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | - EDAC_FLAG_S4ECD4ED; + EDAC_FLAG_S4ECD4ED; /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E752X_REVISION; mci->dev = &pdev->dev; debugf3("%s(): init pvt\n", __func__); - pvt = (struct e752x_pvt *) mci->pvt_info; + pvt = (struct e752x_pvt *)mci->pvt_info; pvt->dev_info = &e752x_devs[dev_idx]; pvt->mc_symmetric = ((ddrcsr & 0x10) != 0); @@ -993,16 +1004,20 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): more mci init\n", __func__); mci->ctl_name = pvt->dev_info->ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = e752x_check; mci->ctl_page_to_phys = ctl_page_to_phys; - e752x_init_csrows(mci, pdev, ddrcsr); - e752x_init_mem_map_table(pdev, pvt); - - /* set the map type. 1 = normal, 0 = reversed */ + /* set the map type. 1 = normal, 0 = reversed + * Must be set before e752x_init_csrows in case csrow mapping + * is reversed. + */ pci_read_config_byte(pdev, E752X_DRM, &stat8); pvt->map_type = ((stat8 & 0x0f) > ((stat8 >> 4) & 0x0f)); + e752x_init_csrows(mci, pdev, ddrcsr); + e752x_init_mem_map_table(pdev, pvt); + mci->edac_cap |= EDAC_FLAG_NONE; debugf3("%s(): tolm, remapbase, remaplimit\n", __func__); @@ -1014,19 +1029,29 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) pci_read_config_word(pdev, E752X_REMAPLIMIT, &pci_data); pvt->remaplimit = ((u32) pci_data) << 14; e752x_printk(KERN_INFO, - "tolm = %x, remapbase = %x, remaplimit = %x\n", pvt->tolm, - pvt->remapbase, pvt->remaplimit); + "tolm = %x, remapbase = %x, remaplimit = %x\n", + pvt->tolm, pvt->remapbase, pvt->remaplimit); /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } e752x_init_error_reporting_regs(pvt); - e752x_get_error_info(mci, &discard); /* clear other MCH errors */ + e752x_get_error_info(mci, &discard); /* clear other MCH errors */ + + /* allocating generic PCI control info */ + e752x_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!e752x_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } /* get this far and it's successful */ debugf3("%s(): success\n", __func__); @@ -1043,12 +1068,12 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit e752x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); /* wake up and enable device */ - if(pci_enable_device(pdev) < 0) + if (pci_enable_device(pdev) < 0) return -EIO; return e752x_probe1(pdev, ent->driver_data); @@ -1061,10 +1086,13 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (e752x_pci) + edac_pci_release_generic_ctl(e752x_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct e752x_pvt *) mci->pvt_info; + pvt = (struct e752x_pvt *)mci->pvt_info; pci_dev_put(pvt->dev_d0f0); pci_dev_put(pvt->dev_d0f1); pci_dev_put(pvt->bridge_ck); @@ -1073,20 +1101,17 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7520 - }, + PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7520}, { - PCI_VEND_DEV(INTEL, 7525_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7525 - }, + PCI_VEND_DEV(INTEL, 7525_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7525}, { - PCI_VEND_DEV(INTEL, 7320_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7320 - }, + PCI_VEND_DEV(INTEL, 7320_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7320}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, e752x_pci_tbl); @@ -1122,5 +1147,6 @@ MODULE_DESCRIPTION("MC support for Intel e752x memory controllers"); module_param(force_function_unhide, int, 0444); MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:" -" 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); - + " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 310d91b41c96..96ecc4926641 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -27,9 +27,10 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include <linux/edac.h> +#include "edac_core.h" -#define E7XXX_REVISION " Ver: 2.0.1 " __DATE__ +#define E7XXX_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "e7xxx_edac" #define e7xxx_printk(level, fmt, arg...) \ @@ -143,23 +144,21 @@ struct e7xxx_error_info { u32 dram_uelog_add; }; +static struct edac_pci_ctl_info *e7xxx_pci; + static const struct e7xxx_dev_info e7xxx_devs[] = { [E7500] = { .err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR, - .ctl_name = "E7500" - }, + .ctl_name = "E7500"}, [E7501] = { .err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR, - .ctl_name = "E7501" - }, + .ctl_name = "E7501"}, [E7505] = { .err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR, - .ctl_name = "E7505" - }, + .ctl_name = "E7505"}, [E7205] = { .err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR, - .ctl_name = "E7205" - }, + .ctl_name = "E7205"}, }; /* FIXME - is this valid for both SECDED and S4ECD4ED? */ @@ -180,15 +179,15 @@ static inline int e7xxx_find_channel(u16 syndrome) } static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, - unsigned long page) + unsigned long page) { u32 remap; - struct e7xxx_pvt *pvt = (struct e7xxx_pvt *) mci->pvt_info; + struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; debugf3("%s()\n", __func__); if ((page < pvt->tolm) || - ((page >= 0x100000) && (page < pvt->remapbase))) + ((page >= 0x100000) && (page < pvt->remapbase))) return page; remap = (page - pvt->tolm) + pvt->remapbase; @@ -200,8 +199,7 @@ static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, return pvt->tolm - 1; } -static void process_ce(struct mem_ctl_info *mci, - struct e7xxx_error_info *info) +static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) { u32 error_1b, page; u16 syndrome; @@ -212,7 +210,7 @@ static void process_ce(struct mem_ctl_info *mci, /* read the error address */ error_1b = info->dram_celog_add; /* FIXME - should use PAGE_SHIFT */ - page = error_1b >> 6; /* convert the address to 4k page */ + page = error_1b >> 6; /* convert the address to 4k page */ /* read the syndrome */ syndrome = info->dram_celog_syndrome; /* FIXME - check for -1 */ @@ -228,8 +226,7 @@ static void process_ce_no_info(struct mem_ctl_info *mci) edac_mc_handle_ce_no_info(mci, "e7xxx CE log register overflow"); } -static void process_ue(struct mem_ctl_info *mci, - struct e7xxx_error_info *info) +static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info) { u32 error_2b, block_page; int row; @@ -238,7 +235,7 @@ static void process_ue(struct mem_ctl_info *mci, /* read the error address */ error_2b = info->dram_uelog_add; /* FIXME - should use PAGE_SHIFT */ - block_page = error_2b >> 6; /* convert to 4k address */ + block_page = error_2b >> 6; /* convert to 4k address */ row = edac_mc_find_csrow_by_page(mci, block_page); edac_mc_handle_ue(mci, block_page, 0, row, "e7xxx UE"); } @@ -249,16 +246,14 @@ static void process_ue_no_info(struct mem_ctl_info *mci) edac_mc_handle_ue_no_info(mci, "e7xxx UE log register overflow"); } -static void e7xxx_get_error_info (struct mem_ctl_info *mci, - struct e7xxx_error_info *info) +static void e7xxx_get_error_info(struct mem_ctl_info *mci, + struct e7xxx_error_info *info) { struct e7xxx_pvt *pvt; - pvt = (struct e7xxx_pvt *) mci->pvt_info; - pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, - &info->dram_ferr); - pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, - &info->dram_nerr); + pvt = (struct e7xxx_pvt *)mci->pvt_info; + pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_FERR, &info->dram_ferr); + pci_read_config_byte(pvt->bridge_ck, E7XXX_DRAM_NERR, &info->dram_nerr); if ((info->dram_ferr & 1) || (info->dram_nerr & 1)) { pci_read_config_dword(pvt->bridge_ck, E7XXX_DRAM_CELOG_ADD, @@ -279,8 +274,9 @@ static void e7xxx_get_error_info (struct mem_ctl_info *mci, pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03); } -static int e7xxx_process_error_info (struct mem_ctl_info *mci, - struct e7xxx_error_info *info, int handle_errors) +static int e7xxx_process_error_info(struct mem_ctl_info *mci, + struct e7xxx_error_info *info, + int handle_errors) { int error_found; @@ -341,7 +337,6 @@ static inline int dual_channel_active(u32 drc, int dev_idx) return (dev_idx == E7501) ? ((drc >> 22) & 0x1) : 1; } - /* Return DRB granularity (0=32mb, 1=64mb). */ static inline int drb_granularity(u32 drc, int dev_idx) { @@ -349,9 +344,8 @@ static inline int drb_granularity(u32 drc, int dev_idx) return (dev_idx == E7501) ? ((drc >> 18) & 0x3) : 1; } - static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - int dev_idx, u32 drc) + int dev_idx, u32 drc) { unsigned long last_cumul_size; int index; @@ -419,10 +413,21 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) struct e7xxx_error_info discard; debugf0("%s(): mci\n", __func__); + + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + pci_read_config_dword(pdev, E7XXX_DRC, &drc); drc_chan = dual_channel_active(drc, dev_idx); - mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1); + mci = edac_mc_alloc(sizeof(*pvt), E7XXX_NR_CSROWS, drc_chan + 1, 0); if (mci == NULL) return -ENOMEM; @@ -430,17 +435,16 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): init mci\n", __func__); mci->mtype_cap = MEM_FLAG_RDDR; mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | - EDAC_FLAG_S4ECD4ED; + EDAC_FLAG_S4ECD4ED; /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E7XXX_REVISION; mci->dev = &pdev->dev; debugf3("%s(): init pvt\n", __func__); - pvt = (struct e7xxx_pvt *) mci->pvt_info; + pvt = (struct e7xxx_pvt *)mci->pvt_info; pvt->dev_info = &e7xxx_devs[dev_idx]; pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, - pvt->dev_info->err_dev, - pvt->bridge_ck); + pvt->dev_info->err_dev, pvt->bridge_ck); if (!pvt->bridge_ck) { e7xxx_printk(KERN_ERR, "error reporting device not found:" @@ -451,6 +455,7 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): more mci init\n", __func__); mci->ctl_name = pvt->dev_info->ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = e7xxx_check; mci->ctl_page_to_phys = ctl_page_to_phys; e7xxx_init_csrows(mci, pdev, dev_idx, drc); @@ -473,11 +478,22 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail1; } + /* allocating generic PCI control info */ + e7xxx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!e7xxx_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); return 0; @@ -493,7 +509,7 @@ fail0: /* returns count (>= 0), or negative on error */ static int __devinit e7xxx_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); @@ -509,34 +525,33 @@ static void __devexit e7xxx_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (e7xxx_pci) + edac_pci_release_generic_ctl(e7xxx_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct e7xxx_pvt *) mci->pvt_info; + pvt = (struct e7xxx_pvt *)mci->pvt_info; pci_dev_put(pvt->bridge_ck); edac_mc_free(mci); } static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7205 - }, + PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7205}, { - PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7500 - }, + PCI_VEND_DEV(INTEL, 7500_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7500}, { - PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7501 - }, + PCI_VEND_DEV(INTEL, 7501_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7501}, { - PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - E7505 - }, + PCI_VEND_DEV(INTEL, 7505_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + E7505}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl); @@ -563,5 +578,7 @@ module_exit(e7xxx_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on.work by Dan Hollis et al"); + "Based on.work by Dan Hollis et al"); MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_core.h index 713444cc4105..4e6bad15c4ba 100644 --- a/drivers/edac/edac_mc.h +++ b/drivers/edac/edac_core.h @@ -1,6 +1,7 @@ /* - * MC kernel module - * (C) 2003 Linux Networx (http://lnxi.com) + * Defines, structures, APIs for edac_core module + * + * (C) 2007 Linux Networx (http://lnxi.com) * This file may be distributed under the terms of the * GNU General Public License. * @@ -11,12 +12,13 @@ * NMI handling support added by * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> * - * $Id: edac_mc.h,v 1.4.2.10 2005/10/05 00:43:44 dsp_llnl Exp $ + * Refactored for multi-source files: + * Doug Thompson <norsk5@xmission.com> * */ -#ifndef _EDAC_MC_H_ -#define _EDAC_MC_H_ +#ifndef _EDAC_CORE_H_ +#define _EDAC_CORE_H_ #include <linux/kernel.h> #include <linux/types.h> @@ -30,9 +32,14 @@ #include <linux/completion.h> #include <linux/kobject.h> #include <linux/platform_device.h> +#include <linux/sysdev.h> +#include <linux/workqueue.h> +#include <linux/version.h> #define EDAC_MC_LABEL_LEN 31 -#define MC_PROC_NAME_MAX_LEN 7 +#define EDAC_DEVICE_NAME_LEN 31 +#define EDAC_ATTRIB_VALUE_LEN 15 +#define MC_PROC_NAME_MAX_LEN 7 #if PAGE_SHIFT < 20 #define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) ) @@ -49,6 +56,14 @@ #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) +/* edac_device printk */ +#define edac_device_printk(ctl, level, fmt, arg...) \ + printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) + +/* edac_pci printk */ +#define edac_pci_printk(ctl, level, fmt, arg...) \ + printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) + /* prefixes for edac_printk() and edac_mc_printk() */ #define EDAC_MC "MC" #define EDAC_PCI "PCI" @@ -60,7 +75,7 @@ extern int edac_debug_level; #define edac_debug_printk(level, fmt, arg...) \ do { \ if (level <= edac_debug_level) \ - edac_printk(KERN_DEBUG, EDAC_DEBUG, fmt, ##arg); \ + edac_printk(KERN_EMERG, EDAC_DEBUG, fmt, ##arg); \ } while(0) #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) @@ -69,7 +84,7 @@ extern int edac_debug_level; #define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ ) #define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ ) -#else /* !CONFIG_EDAC_DEBUG */ +#else /* !CONFIG_EDAC_DEBUG */ #define debugf0( ... ) #define debugf1( ... ) @@ -77,18 +92,14 @@ extern int edac_debug_level; #define debugf3( ... ) #define debugf4( ... ) -#endif /* !CONFIG_EDAC_DEBUG */ +#endif /* !CONFIG_EDAC_DEBUG */ #define BIT(x) (1 << (x)) #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ PCI_DEVICE_ID_ ## vend ## _ ## dev -#if defined(CONFIG_X86) && defined(CONFIG_PCI) -#define dev_name(dev) pci_name(to_pci_dev(dev)) -#else -#define dev_name(dev) to_platform_device(dev)->name -#endif +#define dev_name(dev) (dev)->dev_name /* memory devices */ enum dev_type { @@ -124,8 +135,9 @@ enum mem_type { MEM_DDR, /* Double data rate SDRAM */ MEM_RDDR, /* Registered Double data rate SDRAM */ MEM_RMBS, /* Rambus DRAM */ - MEM_DDR2, /* DDR2 RAM */ - MEM_FB_DDR2, /* fully buffered DDR2 */ + MEM_DDR2, /* DDR2 RAM */ + MEM_FB_DDR2, /* fully buffered DDR2 */ + MEM_RDDR2, /* Registered DDR2 RAM */ }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -141,6 +153,7 @@ enum mem_type { #define MEM_FLAG_RMBS BIT(MEM_RMBS) #define MEM_FLAG_DDR2 BIT(MEM_DDR2) #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) /* chipset Error Detection and Correction capabilities and mode */ enum edac_type { @@ -181,16 +194,23 @@ enum scrub_type { }; #define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) -#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC_CORR) -#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC_CORR) +#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC) +#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC) #define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) #define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) -#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC_CORR) -#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC_CORR) +#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC) +#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC) #define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ +/* EDAC internal operation states */ +#define OP_ALLOC 0x100 +#define OP_RUNNING_POLL 0x201 +#define OP_RUNNING_INTERRUPT 0x202 +#define OP_RUNNING_POLL_INTR 0x203 +#define OP_OFFLINE 0x300 + /* * There are several things to be aware of that aren't at all obvious: * @@ -276,7 +296,7 @@ enum scrub_type { struct channel_info { int chan_idx; /* channel index */ u32 ce_count; /* Correctable Errors for this CHANNEL */ - char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ + char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ struct csrow_info *csrow; /* the parent */ }; @@ -297,15 +317,29 @@ struct csrow_info { struct mem_ctl_info *mci; /* the parent */ struct kobject kobj; /* sysfs kobject for this csrow */ - struct completion kobj_complete; - /* FIXME the number of CHANNELs might need to become dynamic */ + /* channel information for this csrow */ u32 nr_channels; struct channel_info *channels; }; +/* mcidev_sysfs_attribute structure + * used for driver sysfs attributes and in mem_ctl_info + * sysfs top level entries + */ +struct mcidev_sysfs_attribute { + struct attribute attr; + ssize_t (*show)(struct mem_ctl_info *,char *); + ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); +}; + +/* MEMORY controller information structure + */ struct mem_ctl_info { - struct list_head link; /* for global list of mem_ctl_info structs */ + struct list_head link; /* for global list of mem_ctl_info structs */ + + struct module *owner; /* Module owner of this control struct */ + unsigned long mtype_cap; /* memory types supported by mc */ unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */ unsigned long edac_cap; /* configuration capabilities - this is @@ -322,14 +356,15 @@ struct mem_ctl_info { /* Translates sdram memory scrub rate given in bytes/sec to the internal representation and configures whatever else needs to be configured. - */ - int (*set_sdram_scrub_rate) (struct mem_ctl_info *mci, u32 *bw); + */ + int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); /* Get the current sdram memory scrub rate from the internal representation and converts it to the closest matching bandwith in bytes/sec. - */ - int (*get_sdram_scrub_rate) (struct mem_ctl_info *mci, u32 *bw); + */ + int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); + /* pointer to edac checking routine */ void (*edac_check) (struct mem_ctl_info * mci); @@ -340,7 +375,7 @@ struct mem_ctl_info { */ /* FIXME - why not send the phys page to begin with? */ unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, - unsigned long page); + unsigned long page); int mc_idx; int nr_csrows; struct csrow_info *csrows; @@ -353,6 +388,7 @@ struct mem_ctl_info { const char *mod_name; const char *mod_ver; const char *ctl_name; + const char *dev_name; char proc_name[MC_PROC_NAME_MAX_LEN + 1]; void *pvt_info; u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ @@ -369,14 +405,327 @@ struct mem_ctl_info { /* edac sysfs device control */ struct kobject edac_mci_kobj; - struct completion kobj_complete; + + /* Additional top controller level attributes, but specified + * by the low level driver. + * + * Set by the low level driver to provide attributes at the + * controller level, same level as 'ue_count' and 'ce_count' above. + * An array of structures, NULL terminated + * + * If attributes are desired, then set to array of attributes + * If no attributes are desired, leave NULL + */ + struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes; + + /* work struct for this MC */ + struct delayed_work work; + + /* the internal state of this controller instance */ + int op_state; +}; + +/* + * The following are the structures to provide for a generic + * or abstract 'edac_device'. This set of structures and the + * code that implements the APIs for the same, provide for + * registering EDAC type devices which are NOT standard memory. + * + * CPU caches (L1 and L2) + * DMA engines + * Core CPU swithces + * Fabric switch units + * PCIe interface controllers + * other EDAC/ECC type devices that can be monitored for + * errors, etc. + * + * It allows for a 2 level set of hiearchry. For example: + * + * cache could be composed of L1, L2 and L3 levels of cache. + * Each CPU core would have its own L1 cache, while sharing + * L2 and maybe L3 caches. + * + * View them arranged, via the sysfs presentation: + * /sys/devices/system/edac/.. + * + * mc/ <existing memory device directory> + * cpu/cpu0/.. <L1 and L2 block directory> + * /L1-cache/ce_count + * /ue_count + * /L2-cache/ce_count + * /ue_count + * cpu/cpu1/.. <L1 and L2 block directory> + * /L1-cache/ce_count + * /ue_count + * /L2-cache/ce_count + * /ue_count + * ... + * + * the L1 and L2 directories would be "edac_device_block's" + */ + +struct edac_device_counter { + u32 ue_count; + u32 ce_count; +}; + +/* forward reference */ +struct edac_device_ctl_info; +struct edac_device_block; + +/* edac_dev_sysfs_attribute structure + * used for driver sysfs attributes in mem_ctl_info + * for extra controls and attributes: + * like high level error Injection controls + */ +struct edac_dev_sysfs_attribute { + struct attribute attr; + ssize_t (*show)(struct edac_device_ctl_info *, char *); + ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t); +}; + +/* edac_dev_sysfs_block_attribute structure + * + * used in leaf 'block' nodes for adding controls/attributes + * + * each block in each instance of the containing control structure + * can have an array of the following. The show and store functions + * will be filled in with the show/store function in the + * low level driver. + * + * The 'value' field will be the actual value field used for + * counting + */ +struct edac_dev_sysfs_block_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *, struct attribute *, char *); + ssize_t (*store)(struct kobject *, struct attribute *, + const char *, size_t); + struct edac_device_block *block; + + unsigned int value; +}; + +/* device block control structure */ +struct edac_device_block { + struct edac_device_instance *instance; /* Up Pointer */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + struct edac_device_counter counters; /* basic UE and CE counters */ + + int nr_attribs; /* how many attributes */ + + /* this block's attributes, could be NULL */ + struct edac_dev_sysfs_block_attribute *block_attributes; + + /* edac sysfs device control */ + struct kobject kobj; +}; + +/* device instance control structure */ +struct edac_device_instance { + struct edac_device_ctl_info *ctl; /* Up pointer */ + char name[EDAC_DEVICE_NAME_LEN + 4]; + + struct edac_device_counter counters; /* instance counters */ + + u32 nr_blocks; /* how many blocks */ + struct edac_device_block *blocks; /* block array */ + + /* edac sysfs device control */ + struct kobject kobj; +}; + + +/* + * Abstract edac_device control info structure + * + */ +struct edac_device_ctl_info { + /* for global list of edac_device_ctl_info structs */ + struct list_head link; + + struct module *owner; /* Module owner of this control struct */ + + int dev_idx; + + /* Per instance controls for this edac_device */ + int log_ue; /* boolean for logging UEs */ + int log_ce; /* boolean for logging CEs */ + int panic_on_ue; /* boolean for panic'ing on an UE */ + unsigned poll_msec; /* number of milliseconds to poll interval */ + unsigned long delay; /* number of jiffies for poll_msec */ + + /* Additional top controller level attributes, but specified + * by the low level driver. + * + * Set by the low level driver to provide attributes at the + * controller level, same level as 'ue_count' and 'ce_count' above. + * An array of structures, NULL terminated + * + * If attributes are desired, then set to array of attributes + * If no attributes are desired, leave NULL + */ + struct edac_dev_sysfs_attribute *sysfs_attributes; + + /* pointer to main 'edac' class in sysfs */ + struct sysdev_class *edac_class; + + /* the internal state of this controller instance */ + int op_state; + /* work struct for this instance */ + struct delayed_work work; + + /* pointer to edac polling checking routine: + * If NOT NULL: points to polling check routine + * If NULL: Then assumes INTERRUPT operation, where + * MC driver will receive events + */ + void (*edac_check) (struct edac_device_ctl_info * edac_dev); + + struct device *dev; /* pointer to device structure */ + + const char *mod_name; /* module name */ + const char *ctl_name; /* edac controller name */ + const char *dev_name; /* pci/platform/etc... name */ + + void *pvt_info; /* pointer to 'private driver' info */ + + unsigned long start_time; /* edac_device load start time (jiffies) */ + + /* these are for safe removal of mc devices from global list while + * NMI handlers may be traversing list + */ + struct rcu_head rcu; + struct completion removal_complete; + + /* sysfs top name under 'edac' directory + * and instance name: + * cpu/cpu0/... + * cpu/cpu1/... + * cpu/cpu2/... + * ... + */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + /* Number of instances supported on this control structure + * and the array of those instances + */ + u32 nr_instances; + struct edac_device_instance *instances; + + /* Event counters for the this whole EDAC Device */ + struct edac_device_counter counters; + + /* edac sysfs device control for the 'name' + * device this structure controls + */ + struct kobject kobj; }; +/* To get from the instance's wq to the beginning of the ctl structure */ +#define to_edac_mem_ctl_work(w) \ + container_of(w, struct mem_ctl_info, work) + +#define to_edac_device_ctl_work(w) \ + container_of(w,struct edac_device_ctl_info,work) + +/* + * The alloc() and free() functions for the 'edac_device' control info + * structure. A MC driver will allocate one of these for each edac_device + * it is going to control/register with the EDAC CORE. + */ +extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( + unsigned sizeof_private, + char *edac_device_name, unsigned nr_instances, + char *edac_block_name, unsigned nr_blocks, + unsigned offset_value, + struct edac_dev_sysfs_block_attribute *block_attributes, + unsigned nr_attribs, + int device_index); + +/* The offset value can be: + * -1 indicating no offset value + * 0 for zero-based block numbers + * 1 for 1-based block number + * other for other-based block number + */ +#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1) + +extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info); + #ifdef CONFIG_PCI +struct edac_pci_counter { + atomic_t pe_count; + atomic_t npe_count; +}; + +/* + * Abstract edac_pci control info structure + * + */ +struct edac_pci_ctl_info { + /* for global list of edac_pci_ctl_info structs */ + struct list_head link; + + int pci_idx; + + struct sysdev_class *edac_class; /* pointer to class */ + + /* the internal state of this controller instance */ + int op_state; + /* work struct for this instance */ + struct delayed_work work; + + /* pointer to edac polling checking routine: + * If NOT NULL: points to polling check routine + * If NULL: Then assumes INTERRUPT operation, where + * MC driver will receive events + */ + void (*edac_check) (struct edac_pci_ctl_info * edac_dev); + + struct device *dev; /* pointer to device structure */ + + const char *mod_name; /* module name */ + const char *ctl_name; /* edac controller name */ + const char *dev_name; /* pci/platform/etc... name */ + + void *pvt_info; /* pointer to 'private driver' info */ + + unsigned long start_time; /* edac_pci load start time (jiffies) */ + + /* these are for safe removal of devices from global list while + * NMI handlers may be traversing list + */ + struct rcu_head rcu; + struct completion complete; + + /* sysfs top name under 'edac' directory + * and instance name: + * cpu/cpu0/... + * cpu/cpu1/... + * cpu/cpu2/... + * ... + */ + char name[EDAC_DEVICE_NAME_LEN + 1]; + + /* Event counters for the this whole EDAC Device */ + struct edac_pci_counter counters; + + /* edac sysfs device control for the 'name' + * device this structure controls + */ + struct kobject kobj; + struct completion kobj_complete; +}; + +#define to_edac_pci_ctl_work(w) \ + container_of(w, struct edac_pci_ctl_info,work) + /* write all or some bits in a byte-register*/ static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value, - u8 mask) + u8 mask) { if (mask != 0xff) { u8 buf; @@ -392,7 +741,7 @@ static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value, /* write all or some bits in a word-register*/ static inline void pci_write_bits16(struct pci_dev *pdev, int offset, - u16 value, u16 mask) + u16 value, u16 mask) { if (mask != 0xffff) { u16 buf; @@ -408,7 +757,7 @@ static inline void pci_write_bits16(struct pci_dev *pdev, int offset, /* write all or some bits in a dword-register*/ static inline void pci_write_bits32(struct pci_dev *pdev, int offset, - u32 value, u32 mask) + u32 value, u32 mask) { if (mask != 0xffff) { u32 buf; @@ -422,20 +771,16 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset, pci_write_config_dword(pdev, offset, value); } -#endif /* CONFIG_PCI */ +#endif /* CONFIG_PCI */ -#ifdef CONFIG_EDAC_DEBUG -void edac_mc_dump_channel(struct channel_info *chan); -void edac_mc_dump_mci(struct mem_ctl_info *mci); -void edac_mc_dump_csrow(struct csrow_info *csrow); -#endif /* CONFIG_EDAC_DEBUG */ - -extern int edac_mc_add_mc(struct mem_ctl_info *mci,int mc_idx); -extern struct mem_ctl_info * edac_mc_del_mc(struct device *dev); +extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, + unsigned nr_chans, int edac_index); +extern int edac_mc_add_mc(struct mem_ctl_info *mci); +extern void edac_mc_free(struct mem_ctl_info *mci); +extern struct mem_ctl_info *edac_mc_find(int idx); +extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, - unsigned long page); -extern void edac_mc_scrub_block(unsigned long page, unsigned long offset, - u32 size); + unsigned long page); /* * The no info errors are used when error overflows are reported. @@ -448,34 +793,59 @@ extern void edac_mc_scrub_block(unsigned long page, unsigned long offset, * statement clutter and extra function arguments. */ extern void edac_mc_handle_ce(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - unsigned long syndrome, int row, int channel, - const char *msg); + unsigned long page_frame_number, + unsigned long offset_in_page, + unsigned long syndrome, int row, int channel, + const char *msg); extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, - const char *msg); + const char *msg); extern void edac_mc_handle_ue(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - int row, const char *msg); + unsigned long page_frame_number, + unsigned long offset_in_page, int row, + const char *msg); extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, - const char *msg); -extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channel0, - unsigned int channel1, - char *msg); -extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channel, - char *msg); + const char *msg); +extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow, + unsigned int channel0, unsigned int channel1, + char *msg); +extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow, + unsigned int channel, char *msg); /* - * This kmalloc's and initializes all the structures. - * Can't be used if all structures don't have the same lifetime. + * edac_device APIs */ -extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, - unsigned nr_chans); +extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); +extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); +extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg); +extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg); -/* Free an mc previously allocated by edac_mc_alloc() */ -extern void edac_mc_free(struct mem_ctl_info *mci); +/* + * edac_pci APIs + */ +extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, + const char *edac_pci_name); + +extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci); + +extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, + unsigned long value); + +extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx); +extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev); + +extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl( + struct device *dev, + const char *mod_name); + +extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci); +extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci); +extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci); + +/* + * edac misc APIs + */ +extern char *edac_op_state_to_string(int op_state); -#endif /* _EDAC_MC_H_ */ +#endif /* _EDAC_CORE_H_ */ diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c new file mode 100644 index 000000000000..f3690a697cf9 --- /dev/null +++ b/drivers/edac/edac_device.c @@ -0,0 +1,746 @@ + +/* + * edac_device.c + * (C) 2007 www.douglaskthompson.com + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Doug Thompson <norsk5@xmission.com> + * + * edac_device API implementation + * 19 Jan 2007 + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/sysctl.h> +#include <linux/highmem.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> +#include <linux/workqueue.h> +#include <asm/uaccess.h> +#include <asm/page.h> + +#include "edac_core.h" +#include "edac_module.h" + +/* lock for the list: 'edac_device_list', manipulation of this list + * is protected by the 'device_ctls_mutex' lock + */ +static DEFINE_MUTEX(device_ctls_mutex); +static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list); + +#ifdef CONFIG_EDAC_DEBUG +static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) +{ + debugf3("\tedac_dev = %p dev_idx=%d \n", edac_dev, edac_dev->dev_idx); + debugf4("\tedac_dev->edac_check = %p\n", edac_dev->edac_check); + debugf3("\tdev = %p\n", edac_dev->dev); + debugf3("\tmod_name:ctl_name = %s:%s\n", + edac_dev->mod_name, edac_dev->ctl_name); + debugf3("\tpvt_info = %p\n\n", edac_dev->pvt_info); +} +#endif /* CONFIG_EDAC_DEBUG */ + + +/* + * edac_device_alloc_ctl_info() + * Allocate a new edac device control info structure + * + * The control structure is allocated in complete chunk + * from the OS. It is in turn sub allocated to the + * various objects that compose the struture + * + * The structure has a 'nr_instance' array within itself. + * Each instance represents a major component + * Example: L1 cache and L2 cache are 2 instance components + * + * Within each instance is an array of 'nr_blocks' blockoffsets + */ +struct edac_device_ctl_info *edac_device_alloc_ctl_info( + unsigned sz_private, + char *edac_device_name, unsigned nr_instances, + char *edac_block_name, unsigned nr_blocks, + unsigned offset_value, /* zero, 1, or other based offset */ + struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib, + int device_index) +{ + struct edac_device_ctl_info *dev_ctl; + struct edac_device_instance *dev_inst, *inst; + struct edac_device_block *dev_blk, *blk_p, *blk; + struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib; + unsigned total_size; + unsigned count; + unsigned instance, block, attr; + void *pvt; + int err; + + debugf4("%s() instances=%d blocks=%d\n", + __func__, nr_instances, nr_blocks); + + /* Calculate the size of memory we need to allocate AND + * determine the offsets of the various item arrays + * (instance,block,attrib) from the start of an allocated structure. + * We want the alignment of each item (instance,block,attrib) + * to be at least as stringent as what the compiler would + * provide if we could simply hardcode everything into a single struct. + */ + dev_ctl = (struct edac_device_ctl_info *)NULL; + + /* Calc the 'end' offset past end of ONE ctl_info structure + * which will become the start of the 'instance' array + */ + dev_inst = edac_align_ptr(&dev_ctl[1], sizeof(*dev_inst)); + + /* Calc the 'end' offset past the instance array within the ctl_info + * which will become the start of the block array + */ + dev_blk = edac_align_ptr(&dev_inst[nr_instances], sizeof(*dev_blk)); + + /* Calc the 'end' offset past the dev_blk array + * which will become the start of the attrib array, if any. + */ + count = nr_instances * nr_blocks; + dev_attrib = edac_align_ptr(&dev_blk[count], sizeof(*dev_attrib)); + + /* Check for case of when an attribute array is specified */ + if (nr_attrib > 0) { + /* calc how many nr_attrib we need */ + count *= nr_attrib; + + /* Calc the 'end' offset past the attributes array */ + pvt = edac_align_ptr(&dev_attrib[count], sz_private); + } else { + /* no attribute array specificed */ + pvt = edac_align_ptr(dev_attrib, sz_private); + } + + /* 'pvt' now points to where the private data area is. + * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib) + * is baselined at ZERO + */ + total_size = ((unsigned long)pvt) + sz_private; + + /* Allocate the amount of memory for the set of control structures */ + dev_ctl = kzalloc(total_size, GFP_KERNEL); + if (dev_ctl == NULL) + return NULL; + + /* Adjust pointers so they point within the actual memory we + * just allocated rather than an imaginary chunk of memory + * located at address 0. + * 'dev_ctl' points to REAL memory, while the others are + * ZERO based and thus need to be adjusted to point within + * the allocated memory. + */ + dev_inst = (struct edac_device_instance *) + (((char *)dev_ctl) + ((unsigned long)dev_inst)); + dev_blk = (struct edac_device_block *) + (((char *)dev_ctl) + ((unsigned long)dev_blk)); + dev_attrib = (struct edac_dev_sysfs_block_attribute *) + (((char *)dev_ctl) + ((unsigned long)dev_attrib)); + pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL; + + /* Begin storing the information into the control info structure */ + dev_ctl->dev_idx = device_index; + dev_ctl->nr_instances = nr_instances; + dev_ctl->instances = dev_inst; + dev_ctl->pvt_info = pvt; + + /* Name of this edac device */ + snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name); + + debugf4("%s() edac_dev=%p next after end=%p\n", + __func__, dev_ctl, pvt + sz_private ); + + /* Initialize every Instance */ + for (instance = 0; instance < nr_instances; instance++) { + inst = &dev_inst[instance]; + inst->ctl = dev_ctl; + inst->nr_blocks = nr_blocks; + blk_p = &dev_blk[instance * nr_blocks]; + inst->blocks = blk_p; + + /* name of this instance */ + snprintf(inst->name, sizeof(inst->name), + "%s%u", edac_device_name, instance); + + /* Initialize every block in each instance */ + for (block = 0; block < nr_blocks; block++) { + blk = &blk_p[block]; + blk->instance = inst; + snprintf(blk->name, sizeof(blk->name), + "%s%d", edac_block_name, block+offset_value); + + debugf4("%s() instance=%d inst_p=%p block=#%d " + "block_p=%p name='%s'\n", + __func__, instance, inst, block, + blk, blk->name); + + /* if there are NO attributes OR no attribute pointer + * then continue on to next block iteration + */ + if ((nr_attrib == 0) || (attrib_spec == NULL)) + continue; + + /* setup the attribute array for this block */ + blk->nr_attribs = nr_attrib; + attrib_p = &dev_attrib[block*nr_instances*nr_attrib]; + blk->block_attributes = attrib_p; + + debugf4("%s() THIS BLOCK_ATTRIB=%p\n", + __func__, blk->block_attributes); + + /* Initialize every user specified attribute in this + * block with the data the caller passed in + * Each block gets its own copy of pointers, + * and its unique 'value' + */ + for (attr = 0; attr < nr_attrib; attr++) { + attrib = &attrib_p[attr]; + + /* populate the unique per attrib + * with the code pointers and info + */ + attrib->attr = attrib_spec[attr].attr; + attrib->show = attrib_spec[attr].show; + attrib->store = attrib_spec[attr].store; + + attrib->block = blk; /* up link */ + + debugf4("%s() alloc-attrib=%p attrib_name='%s' " + "attrib-spec=%p spec-name=%s\n", + __func__, attrib, attrib->attr.name, + &attrib_spec[attr], + attrib_spec[attr].attr.name + ); + } + } + } + + /* Mark this instance as merely ALLOCATED */ + dev_ctl->op_state = OP_ALLOC; + + /* + * Initialize the 'root' kobj for the edac_device controller + */ + err = edac_device_register_sysfs_main_kobj(dev_ctl); + if (err) { + kfree(dev_ctl); + return NULL; + } + + /* at this point, the root kobj is valid, and in order to + * 'free' the object, then the function: + * edac_device_unregister_sysfs_main_kobj() must be called + * which will perform kobj unregistration and the actual free + * will occur during the kobject callback operation + */ + + return dev_ctl; +} +EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info); + +/* + * edac_device_free_ctl_info() + * frees the memory allocated by the edac_device_alloc_ctl_info() + * function + */ +void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info) +{ + edac_device_unregister_sysfs_main_kobj(ctl_info); +} +EXPORT_SYMBOL_GPL(edac_device_free_ctl_info); + +/* + * find_edac_device_by_dev + * scans the edac_device list for a specific 'struct device *' + * + * lock to be held prior to call: device_ctls_mutex + * + * Return: + * pointer to control structure managing 'dev' + * NULL if not found on list + */ +static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev) +{ + struct edac_device_ctl_info *edac_dev; + struct list_head *item; + + debugf0("%s()\n", __func__); + + list_for_each(item, &edac_device_list) { + edac_dev = list_entry(item, struct edac_device_ctl_info, link); + + if (edac_dev->dev == dev) + return edac_dev; + } + + return NULL; +} + +/* + * add_edac_dev_to_global_list + * Before calling this function, caller must + * assign a unique value to edac_dev->dev_idx. + * + * lock to be held prior to call: device_ctls_mutex + * + * Return: + * 0 on success + * 1 on failure. + */ +static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev) +{ + struct list_head *item, *insert_before; + struct edac_device_ctl_info *rover; + + insert_before = &edac_device_list; + + /* Determine if already on the list */ + rover = find_edac_device_by_dev(edac_dev->dev); + if (unlikely(rover != NULL)) + goto fail0; + + /* Insert in ascending order by 'dev_idx', so find position */ + list_for_each(item, &edac_device_list) { + rover = list_entry(item, struct edac_device_ctl_info, link); + + if (rover->dev_idx >= edac_dev->dev_idx) { + if (unlikely(rover->dev_idx == edac_dev->dev_idx)) + goto fail1; + + insert_before = item; + break; + } + } + + list_add_tail_rcu(&edac_dev->link, insert_before); + return 0; + +fail0: + edac_printk(KERN_WARNING, EDAC_MC, + "%s (%s) %s %s already assigned %d\n", + rover->dev->bus_id, dev_name(rover), + rover->mod_name, rover->ctl_name, rover->dev_idx); + return 1; + +fail1: + edac_printk(KERN_WARNING, EDAC_MC, + "bug in low-level driver: attempt to assign\n" + " duplicate dev_idx %d in %s()\n", rover->dev_idx, + __func__); + return 1; +} + +/* + * complete_edac_device_list_del + * + * callback function when reference count is zero + */ +static void complete_edac_device_list_del(struct rcu_head *head) +{ + struct edac_device_ctl_info *edac_dev; + + edac_dev = container_of(head, struct edac_device_ctl_info, rcu); + INIT_LIST_HEAD(&edac_dev->link); + complete(&edac_dev->removal_complete); +} + +/* + * del_edac_device_from_global_list + * + * remove the RCU, setup for a callback call, + * then wait for the callback to occur + */ +static void del_edac_device_from_global_list(struct edac_device_ctl_info + *edac_device) +{ + list_del_rcu(&edac_device->link); + + init_completion(&edac_device->removal_complete); + call_rcu(&edac_device->rcu, complete_edac_device_list_del); + wait_for_completion(&edac_device->removal_complete); +} + +/** + * edac_device_find + * Search for a edac_device_ctl_info structure whose index is 'idx'. + * + * If found, return a pointer to the structure. + * Else return NULL. + * + * Caller must hold device_ctls_mutex. + */ +struct edac_device_ctl_info *edac_device_find(int idx) +{ + struct list_head *item; + struct edac_device_ctl_info *edac_dev; + + /* Iterate over list, looking for exact match of ID */ + list_for_each(item, &edac_device_list) { + edac_dev = list_entry(item, struct edac_device_ctl_info, link); + + if (edac_dev->dev_idx >= idx) { + if (edac_dev->dev_idx == idx) + return edac_dev; + + /* not on list, so terminate early */ + break; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(edac_device_find); + +/* + * edac_device_workq_function + * performs the operation scheduled by a workq request + * + * this workq is embedded within an edac_device_ctl_info + * structure, that needs to be polled for possible error events. + * + * This operation is to acquire the list mutex lock + * (thus preventing insertation or deletion) + * and then call the device's poll function IFF this device is + * running polled and there is a poll function defined. + */ +static void edac_device_workq_function(struct work_struct *work_req) +{ + struct delayed_work *d_work = (struct delayed_work *)work_req; + struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work); + + mutex_lock(&device_ctls_mutex); + + /* Only poll controllers that are running polled and have a check */ + if ((edac_dev->op_state == OP_RUNNING_POLL) && + (edac_dev->edac_check != NULL)) { + edac_dev->edac_check(edac_dev); + } + + mutex_unlock(&device_ctls_mutex); + + /* Reschedule the workq for the next time period to start again + * if the number of msec is for 1 sec, then adjust to the next + * whole one second to save timers fireing all over the period + * between integral seconds + */ + if (edac_dev->poll_msec == 1000) + queue_delayed_work(edac_workqueue, &edac_dev->work, + round_jiffies(edac_dev->delay)); + else + queue_delayed_work(edac_workqueue, &edac_dev->work, + edac_dev->delay); +} + +/* + * edac_device_workq_setup + * initialize a workq item for this edac_device instance + * passing in the new delay period in msec + */ +void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + unsigned msec) +{ + debugf0("%s()\n", __func__); + + /* take the arg 'msec' and set it into the control structure + * to used in the time period calculation + * then calc the number of jiffies that represents + */ + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); + + INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function); + + /* optimize here for the 1 second case, which will be normal value, to + * fire ON the 1 second time event. This helps reduce all sorts of + * timers firing on sub-second basis, while they are happy + * to fire together on the 1 second exactly + */ + if (edac_dev->poll_msec == 1000) + queue_delayed_work(edac_workqueue, &edac_dev->work, + round_jiffies(edac_dev->delay)); + else + queue_delayed_work(edac_workqueue, &edac_dev->work, + edac_dev->delay); +} + +/* + * edac_device_workq_teardown + * stop the workq processing on this edac_dev + */ +void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) +{ + int status; + + status = cancel_delayed_work(&edac_dev->work); + if (status == 0) { + /* workq instance might be running, wait for it */ + flush_workqueue(edac_workqueue); + } +} + +/* + * edac_device_reset_delay_period + * + * need to stop any outstanding workq queued up at this time + * because we will be resetting the sleep time. + * Then restart the workq on the new delay + */ +void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, + unsigned long value) +{ + /* cancel the current workq request, without the mutex lock */ + edac_device_workq_teardown(edac_dev); + + /* acquire the mutex before doing the workq setup */ + mutex_lock(&device_ctls_mutex); + + /* restart the workq request, with new delay value */ + edac_device_workq_setup(edac_dev, value); + + mutex_unlock(&device_ctls_mutex); +} + +/** + * edac_device_add_device: Insert the 'edac_dev' structure into the + * edac_device global list and create sysfs entries associated with + * edac_device structure. + * @edac_device: pointer to the edac_device structure to be added to the list + * 'edac_device' structure. + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_device_add_device(struct edac_device_ctl_info *edac_dev) +{ + debugf0("%s()\n", __func__); + +#ifdef CONFIG_EDAC_DEBUG + if (edac_debug_level >= 3) + edac_device_dump_device(edac_dev); +#endif + mutex_lock(&device_ctls_mutex); + + if (add_edac_dev_to_global_list(edac_dev)) + goto fail0; + + /* set load time so that error rate can be tracked */ + edac_dev->start_time = jiffies; + + /* create this instance's sysfs entries */ + if (edac_device_create_sysfs(edac_dev)) { + edac_device_printk(edac_dev, KERN_WARNING, + "failed to create sysfs device\n"); + goto fail1; + } + + /* If there IS a check routine, then we are running POLLED */ + if (edac_dev->edac_check != NULL) { + /* This instance is NOW RUNNING */ + edac_dev->op_state = OP_RUNNING_POLL; + + /* + * enable workq processing on this instance, + * default = 1000 msec + */ + edac_device_workq_setup(edac_dev, 1000); + } else { + edac_dev->op_state = OP_RUNNING_INTERRUPT; + } + + /* Report action taken */ + edac_device_printk(edac_dev, KERN_INFO, + "Giving out device to module '%s' controller " + "'%s': DEV '%s' (%s)\n", + edac_dev->mod_name, + edac_dev->ctl_name, + dev_name(edac_dev), + edac_op_state_to_string(edac_dev->op_state)); + + mutex_unlock(&device_ctls_mutex); + return 0; + +fail1: + /* Some error, so remove the entry from the lsit */ + del_edac_device_from_global_list(edac_dev); + +fail0: + mutex_unlock(&device_ctls_mutex); + return 1; +} +EXPORT_SYMBOL_GPL(edac_device_add_device); + +/** + * edac_device_del_device: + * Remove sysfs entries for specified edac_device structure and + * then remove edac_device structure from global list + * + * @pdev: + * Pointer to 'struct device' representing edac_device + * structure to remove. + * + * Return: + * Pointer to removed edac_device structure, + * OR NULL if device not found. + */ +struct edac_device_ctl_info *edac_device_del_device(struct device *dev) +{ + struct edac_device_ctl_info *edac_dev; + + debugf0("%s()\n", __func__); + + mutex_lock(&device_ctls_mutex); + + /* Find the structure on the list, if not there, then leave */ + edac_dev = find_edac_device_by_dev(dev); + if (edac_dev == NULL) { + mutex_unlock(&device_ctls_mutex); + return NULL; + } + + /* mark this instance as OFFLINE */ + edac_dev->op_state = OP_OFFLINE; + + /* clear workq processing on this instance */ + edac_device_workq_teardown(edac_dev); + + /* deregister from global list */ + del_edac_device_from_global_list(edac_dev); + + mutex_unlock(&device_ctls_mutex); + + /* Tear down the sysfs entries for this instance */ + edac_device_remove_sysfs(edac_dev); + + edac_printk(KERN_INFO, EDAC_MC, + "Removed device %d for %s %s: DEV %s\n", + edac_dev->dev_idx, + edac_dev->mod_name, edac_dev->ctl_name, dev_name(edac_dev)); + + return edac_dev; +} +EXPORT_SYMBOL_GPL(edac_device_del_device); + +static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev) +{ + return edac_dev->log_ce; +} + +static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev) +{ + return edac_dev->log_ue; +} + +static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info + *edac_dev) +{ + return edac_dev->panic_on_ue; +} + +/* + * edac_device_handle_ce + * perform a common output and handling of an 'edac_dev' CE event + */ +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg) +{ + struct edac_device_instance *instance; + struct edac_device_block *block = NULL; + + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: 'instance' out of range " + "(%d >= %d)\n", inst_nr, + edac_dev->nr_instances); + return; + } + + instance = edac_dev->instances + inst_nr; + + if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: instance %d 'block' " + "out of range (%d >= %d)\n", + inst_nr, block_nr, + instance->nr_blocks); + return; + } + + if (instance->nr_blocks > 0) { + block = instance->blocks + block_nr; + block->counters.ce_count++; + } + + /* Propogate the count up the 'totals' tree */ + instance->counters.ce_count++; + edac_dev->counters.ce_count++; + + if (edac_device_get_log_ce(edac_dev)) + edac_device_printk(edac_dev, KERN_WARNING, + "CE: %s instance: %s block: %s '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", msg); +} +EXPORT_SYMBOL_GPL(edac_device_handle_ce); + +/* + * edac_device_handle_ue + * perform a common output and handling of an 'edac_dev' UE event + */ +void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, + int inst_nr, int block_nr, const char *msg) +{ + struct edac_device_instance *instance; + struct edac_device_block *block = NULL; + + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: 'instance' out of range " + "(%d >= %d)\n", inst_nr, + edac_dev->nr_instances); + return; + } + + instance = edac_dev->instances + inst_nr; + + if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) { + edac_device_printk(edac_dev, KERN_ERR, + "INTERNAL ERROR: instance %d 'block' " + "out of range (%d >= %d)\n", + inst_nr, block_nr, + instance->nr_blocks); + return; + } + + if (instance->nr_blocks > 0) { + block = instance->blocks + block_nr; + block->counters.ue_count++; + } + + /* Propogate the count up the 'totals' tree */ + instance->counters.ue_count++; + edac_dev->counters.ue_count++; + + if (edac_device_get_log_ue(edac_dev)) + edac_device_printk(edac_dev, KERN_EMERG, + "UE: %s instance: %s block: %s '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", msg); + + if (edac_device_get_panic_on_ue(edac_dev)) + panic("EDAC %s: UE instance: %s block %s '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", msg); +} +EXPORT_SYMBOL_GPL(edac_device_handle_ue); diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c new file mode 100644 index 000000000000..70b837f23c43 --- /dev/null +++ b/drivers/edac/edac_device_sysfs.c @@ -0,0 +1,896 @@ +/* + * file for managing the edac_device class of devices for EDAC + * + * (C) 2007 SoftwareBitMaker (http://www.softwarebitmaker.com) + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> + * + */ + +#include <linux/ctype.h> +#include <linux/module.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_DEVICE_SYMLINK "device" + +#define to_edacdev(k) container_of(k, struct edac_device_ctl_info, kobj) +#define to_edacdev_attr(a) container_of(a, struct edacdev_attribute, attr) + + +/* + * Set of edac_device_ctl_info attribute store/show functions + */ + +/* 'log_ue' */ +static ssize_t edac_device_ctl_log_ue_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->log_ue); +} + +static ssize_t edac_device_ctl_log_ue_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + /* if parameter is zero, turn off flag, if non-zero turn on flag */ + ctl_info->log_ue = (simple_strtoul(data, NULL, 0) != 0); + + return count; +} + +/* 'log_ce' */ +static ssize_t edac_device_ctl_log_ce_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->log_ce); +} + +static ssize_t edac_device_ctl_log_ce_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + /* if parameter is zero, turn off flag, if non-zero turn on flag */ + ctl_info->log_ce = (simple_strtoul(data, NULL, 0) != 0); + + return count; +} + +/* 'panic_on_ue' */ +static ssize_t edac_device_ctl_panic_on_ue_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->panic_on_ue); +} + +static ssize_t edac_device_ctl_panic_on_ue_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + /* if parameter is zero, turn off flag, if non-zero turn on flag */ + ctl_info->panic_on_ue = (simple_strtoul(data, NULL, 0) != 0); + + return count; +} + +/* 'poll_msec' show and store functions*/ +static ssize_t edac_device_ctl_poll_msec_show(struct edac_device_ctl_info + *ctl_info, char *data) +{ + return sprintf(data, "%u\n", ctl_info->poll_msec); +} + +static ssize_t edac_device_ctl_poll_msec_store(struct edac_device_ctl_info + *ctl_info, const char *data, + size_t count) +{ + unsigned long value; + + /* get the value and enforce that it is non-zero, must be at least + * one millisecond for the delay period, between scans + * Then cancel last outstanding delay for the work request + * and set a new one. + */ + value = simple_strtoul(data, NULL, 0); + edac_device_reset_delay_period(ctl_info, value); + + return count; +} + +/* edac_device_ctl_info specific attribute structure */ +struct ctl_info_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_device_ctl_info *, char *); + ssize_t(*store) (struct edac_device_ctl_info *, const char *, size_t); +}; + +#define to_ctl_info(k) container_of(k, struct edac_device_ctl_info, kobj) +#define to_ctl_info_attr(a) container_of(a,struct ctl_info_attribute,attr) + +/* Function to 'show' fields from the edac_dev 'ctl_info' structure */ +static ssize_t edac_dev_ctl_info_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_device_ctl_info *edac_dev = to_ctl_info(kobj); + struct ctl_info_attribute *ctl_info_attr = to_ctl_info_attr(attr); + + if (ctl_info_attr->show) + return ctl_info_attr->show(edac_dev, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_dev 'ctl_info' structure */ +static ssize_t edac_dev_ctl_info_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_device_ctl_info *edac_dev = to_ctl_info(kobj); + struct ctl_info_attribute *ctl_info_attr = to_ctl_info_attr(attr); + + if (ctl_info_attr->store) + return ctl_info_attr->store(edac_dev, buffer, count); + return -EIO; +} + +/* edac_dev file operations for an 'ctl_info' */ +static struct sysfs_ops device_ctl_info_ops = { + .show = edac_dev_ctl_info_show, + .store = edac_dev_ctl_info_store +}; + +#define CTL_INFO_ATTR(_name,_mode,_show,_store) \ +static struct ctl_info_attribute attr_ctl_info_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +/* Declare the various ctl_info attributes here and their respective ops */ +CTL_INFO_ATTR(log_ue, S_IRUGO | S_IWUSR, + edac_device_ctl_log_ue_show, edac_device_ctl_log_ue_store); +CTL_INFO_ATTR(log_ce, S_IRUGO | S_IWUSR, + edac_device_ctl_log_ce_show, edac_device_ctl_log_ce_store); +CTL_INFO_ATTR(panic_on_ue, S_IRUGO | S_IWUSR, + edac_device_ctl_panic_on_ue_show, + edac_device_ctl_panic_on_ue_store); +CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, + edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); + +/* Base Attributes of the EDAC_DEVICE ECC object */ +static struct ctl_info_attribute *device_ctrl_attr[] = { + &attr_ctl_info_panic_on_ue, + &attr_ctl_info_log_ue, + &attr_ctl_info_log_ce, + &attr_ctl_info_poll_msec, + NULL, +}; + +/* + * edac_device_ctrl_master_release + * + * called when the reference count for the 'main' kobj + * for a edac_device control struct reaches zero + * + * Reference count model: + * One 'main' kobject for each control structure allocated. + * That main kobj is initially set to one AND + * the reference count for the EDAC 'core' module is + * bumped by one, thus added 'keep in memory' dependency. + * + * Each new internal kobj (in instances and blocks) then + * bumps the 'main' kobject. + * + * When they are released their release functions decrement + * the 'main' kobj. + * + * When the main kobj reaches zero (0) then THIS function + * is called which then decrements the EDAC 'core' module. + * When the module reference count reaches zero then the + * module no longer has dependency on keeping the release + * function code in memory and module can be unloaded. + * + * This will support several control objects as well, each + * with its own 'main' kobj. + */ +static void edac_device_ctrl_master_release(struct kobject *kobj) +{ + struct edac_device_ctl_info *edac_dev = to_edacdev(kobj); + + debugf4("%s() control index=%d\n", __func__, edac_dev->dev_idx); + + /* decrement the EDAC CORE module ref count */ + module_put(edac_dev->owner); + + /* free the control struct containing the 'main' kobj + * passed in to this routine + */ + kfree(edac_dev); +} + +/* ktype for the main (master) kobject */ +static struct kobj_type ktype_device_ctrl = { + .release = edac_device_ctrl_master_release, + .sysfs_ops = &device_ctl_info_ops, + .default_attrs = (struct attribute **)device_ctrl_attr, +}; + +/* + * edac_device_register_sysfs_main_kobj + * + * perform the high level setup for the new edac_device instance + * + * Return: 0 SUCCESS + * !0 FAILURE + */ +int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) +{ + struct sysdev_class *edac_class; + int err; + + debugf1("%s()\n", __func__); + + /* get the /sys/devices/system/edac reference */ + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class error\n", __func__); + err = -ENODEV; + goto err_out; + } + + /* Point to the 'edac_class' this instance 'reports' to */ + edac_dev->edac_class = edac_class; + + /* Init the devices's kobject */ + memset(&edac_dev->kobj, 0, sizeof(struct kobject)); + edac_dev->kobj.ktype = &ktype_device_ctrl; + + /* set this new device under the edac_class kobject */ + edac_dev->kobj.parent = &edac_class->kset.kobj; + + /* generate sysfs "..../edac/<name>" */ + debugf4("%s() set name of kobject to: %s\n", __func__, edac_dev->name); + err = kobject_set_name(&edac_dev->kobj, "%s", edac_dev->name); + if (err) + goto err_out; + + /* Record which module 'owns' this control structure + * and bump the ref count of the module + */ + edac_dev->owner = THIS_MODULE; + + if (!try_module_get(edac_dev->owner)) { + err = -ENODEV; + goto err_out; + } + + /* register */ + err = kobject_register(&edac_dev->kobj); + if (err) { + debugf1("%s()Failed to register '.../edac/%s'\n", + __func__, edac_dev->name); + goto err_kobj_reg; + } + + /* At this point, to 'free' the control struct, + * edac_device_unregister_sysfs_main_kobj() must be used + */ + + debugf4("%s() Registered '.../edac/%s' kobject\n", + __func__, edac_dev->name); + + return 0; + + /* Error exit stack */ +err_kobj_reg: + module_put(edac_dev->owner); + +err_out: + return err; +} + +/* + * edac_device_unregister_sysfs_main_kobj: + * the '..../edac/<name>' kobject + */ +void edac_device_unregister_sysfs_main_kobj( + struct edac_device_ctl_info *edac_dev) +{ + debugf0("%s()\n", __func__); + debugf4("%s() name of kobject is: %s\n", + __func__, kobject_name(&edac_dev->kobj)); + + /* + * Unregister the edac device's kobject and + * allow for reference count to reach 0 at which point + * the callback will be called to: + * a) module_put() this module + * b) 'kfree' the memory + */ + kobject_unregister(&edac_dev->kobj); +} + +/* edac_dev -> instance information */ + +/* + * Set of low-level instance attribute show functions + */ +static ssize_t instance_ue_count_show(struct edac_device_instance *instance, + char *data) +{ + return sprintf(data, "%u\n", instance->counters.ue_count); +} + +static ssize_t instance_ce_count_show(struct edac_device_instance *instance, + char *data) +{ + return sprintf(data, "%u\n", instance->counters.ce_count); +} + +#define to_instance(k) container_of(k, struct edac_device_instance, kobj) +#define to_instance_attr(a) container_of(a,struct instance_attribute,attr) + +/* DEVICE instance kobject release() function */ +static void edac_device_ctrl_instance_release(struct kobject *kobj) +{ + struct edac_device_instance *instance; + + debugf1("%s()\n", __func__); + + /* map from this kobj to the main control struct + * and then dec the main kobj count + */ + instance = to_instance(kobj); + kobject_put(&instance->ctl->kobj); +} + +/* instance specific attribute structure */ +struct instance_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_device_instance *, char *); + ssize_t(*store) (struct edac_device_instance *, const char *, size_t); +}; + +/* Function to 'show' fields from the edac_dev 'instance' structure */ +static ssize_t edac_dev_instance_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_device_instance *instance = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->show) + return instance_attr->show(instance, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_dev 'instance' structure */ +static ssize_t edac_dev_instance_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_device_instance *instance = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->store) + return instance_attr->store(instance, buffer, count); + return -EIO; +} + +/* edac_dev file operations for an 'instance' */ +static struct sysfs_ops device_instance_ops = { + .show = edac_dev_instance_show, + .store = edac_dev_instance_store +}; + +#define INSTANCE_ATTR(_name,_mode,_show,_store) \ +static struct instance_attribute attr_instance_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +/* + * Define attributes visible for the edac_device instance object + * Each contains a pointer to a show and an optional set + * function pointer that does the low level output/input + */ +INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); +INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); + +/* list of edac_dev 'instance' attributes */ +static struct instance_attribute *device_instance_attr[] = { + &attr_instance_ce_count, + &attr_instance_ue_count, + NULL, +}; + +/* The 'ktype' for each edac_dev 'instance' */ +static struct kobj_type ktype_instance_ctrl = { + .release = edac_device_ctrl_instance_release, + .sysfs_ops = &device_instance_ops, + .default_attrs = (struct attribute **)device_instance_attr, +}; + +/* edac_dev -> instance -> block information */ + +#define to_block(k) container_of(k, struct edac_device_block, kobj) +#define to_block_attr(a) \ + container_of(a, struct edac_dev_sysfs_block_attribute, attr) + +/* + * Set of low-level block attribute show functions + */ +static ssize_t block_ue_count_show(struct kobject *kobj, + struct attribute *attr, char *data) +{ + struct edac_device_block *block = to_block(kobj); + + return sprintf(data, "%u\n", block->counters.ue_count); +} + +static ssize_t block_ce_count_show(struct kobject *kobj, + struct attribute *attr, char *data) +{ + struct edac_device_block *block = to_block(kobj); + + return sprintf(data, "%u\n", block->counters.ce_count); +} + +/* DEVICE block kobject release() function */ +static void edac_device_ctrl_block_release(struct kobject *kobj) +{ + struct edac_device_block *block; + + debugf1("%s()\n", __func__); + + /* get the container of the kobj */ + block = to_block(kobj); + + /* map from 'block kobj' to 'block->instance->controller->main_kobj' + * now 'release' the block kobject + */ + kobject_put(&block->instance->ctl->kobj); +} + + +/* Function to 'show' fields from the edac_dev 'block' structure */ +static ssize_t edac_dev_block_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_dev_sysfs_block_attribute *block_attr = + to_block_attr(attr); + + if (block_attr->show) + return block_attr->show(kobj, attr, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_dev 'block' structure */ +static ssize_t edac_dev_block_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_dev_sysfs_block_attribute *block_attr; + + block_attr = to_block_attr(attr); + + if (block_attr->store) + return block_attr->store(kobj, attr, buffer, count); + return -EIO; +} + +/* edac_dev file operations for a 'block' */ +static struct sysfs_ops device_block_ops = { + .show = edac_dev_block_show, + .store = edac_dev_block_store +}; + +#define BLOCK_ATTR(_name,_mode,_show,_store) \ +static struct edac_dev_sysfs_block_attribute attr_block_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); +BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); + +/* list of edac_dev 'block' attributes */ +static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { + &attr_block_ce_count, + &attr_block_ue_count, + NULL, +}; + +/* The 'ktype' for each edac_dev 'block' */ +static struct kobj_type ktype_block_ctrl = { + .release = edac_device_ctrl_block_release, + .sysfs_ops = &device_block_ops, + .default_attrs = (struct attribute **)device_block_attr, +}; + +/* block ctor/dtor code */ + +/* + * edac_device_create_block + */ +static int edac_device_create_block(struct edac_device_ctl_info *edac_dev, + struct edac_device_instance *instance, + struct edac_device_block *block) +{ + int i; + int err; + struct edac_dev_sysfs_block_attribute *sysfs_attrib; + struct kobject *main_kobj; + + debugf4("%s() Instance '%s' inst_p=%p block '%s' block_p=%p\n", + __func__, instance->name, instance, block->name, block); + debugf4("%s() block kobj=%p block kobj->parent=%p\n", + __func__, &block->kobj, &block->kobj.parent); + + /* init this block's kobject */ + memset(&block->kobj, 0, sizeof(struct kobject)); + block->kobj.parent = &instance->kobj; + block->kobj.ktype = &ktype_block_ctrl; + + err = kobject_set_name(&block->kobj, "%s", block->name); + if (err) + return err; + + /* bump the main kobject's reference count for this controller + * and this instance is dependant on the main + */ + main_kobj = kobject_get(&edac_dev->kobj); + if (!main_kobj) { + err = -ENODEV; + goto err_out; + } + + /* Add this block's kobject */ + err = kobject_register(&block->kobj); + if (err) { + debugf1("%s() Failed to register instance '%s'\n", + __func__, block->name); + kobject_put(main_kobj); + err = -ENODEV; + goto err_out; + } + + /* If there are driver level block attributes, then added them + * to the block kobject + */ + sysfs_attrib = block->block_attributes; + if (sysfs_attrib && block->nr_attribs) { + for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) { + + debugf4("%s() creating block attrib='%s' " + "attrib->%p to kobj=%p\n", + __func__, + sysfs_attrib->attr.name, + sysfs_attrib, &block->kobj); + + /* Create each block_attribute file */ + err = sysfs_create_file(&block->kobj, + &sysfs_attrib->attr); + if (err) + goto err_on_attrib; + } + } + + return 0; + + /* Error unwind stack */ +err_on_attrib: + kobject_unregister(&block->kobj); + +err_out: + return err; +} + +/* + * edac_device_delete_block(edac_dev,block); + */ +static void edac_device_delete_block(struct edac_device_ctl_info *edac_dev, + struct edac_device_block *block) +{ + struct edac_dev_sysfs_block_attribute *sysfs_attrib; + int i; + + /* if this block has 'attributes' then we need to iterate over the list + * and 'remove' the attributes on this block + */ + sysfs_attrib = block->block_attributes; + if (sysfs_attrib && block->nr_attribs) { + for (i = 0; i < block->nr_attribs; i++, sysfs_attrib++) { + + /* remove each block_attrib file */ + sysfs_remove_file(&block->kobj, + (struct attribute *) sysfs_attrib); + } + } + + /* unregister this block's kobject, SEE: + * edac_device_ctrl_block_release() callback operation + */ + kobject_unregister(&block->kobj); +} + +/* instance ctor/dtor code */ + +/* + * edac_device_create_instance + * create just one instance of an edac_device 'instance' + */ +static int edac_device_create_instance(struct edac_device_ctl_info *edac_dev, + int idx) +{ + int i, j; + int err; + struct edac_device_instance *instance; + struct kobject *main_kobj; + + instance = &edac_dev->instances[idx]; + + /* Init the instance's kobject */ + memset(&instance->kobj, 0, sizeof(struct kobject)); + + /* set this new device under the edac_device main kobject */ + instance->kobj.parent = &edac_dev->kobj; + instance->kobj.ktype = &ktype_instance_ctrl; + instance->ctl = edac_dev; + + err = kobject_set_name(&instance->kobj, "%s", instance->name); + if (err) + goto err_out; + + /* bump the main kobject's reference count for this controller + * and this instance is dependant on the main + */ + main_kobj = kobject_get(&edac_dev->kobj); + if (!main_kobj) { + err = -ENODEV; + goto err_out; + } + + /* Formally register this instance's kobject */ + err = kobject_register(&instance->kobj); + if (err != 0) { + debugf2("%s() Failed to register instance '%s'\n", + __func__, instance->name); + kobject_put(main_kobj); + goto err_out; + } + + debugf4("%s() now register '%d' blocks for instance %d\n", + __func__, instance->nr_blocks, idx); + + /* register all blocks of this instance */ + for (i = 0; i < instance->nr_blocks; i++) { + err = edac_device_create_block(edac_dev, instance, + &instance->blocks[i]); + if (err) { + /* If any fail, remove all previous ones */ + for (j = 0; j < i; j++) + edac_device_delete_block(edac_dev, + &instance->blocks[j]); + goto err_release_instance_kobj; + } + } + + debugf4("%s() Registered instance %d '%s' kobject\n", + __func__, idx, instance->name); + + return 0; + + /* error unwind stack */ +err_release_instance_kobj: + kobject_unregister(&instance->kobj); + +err_out: + return err; +} + +/* + * edac_device_remove_instance + * remove an edac_device instance + */ +static void edac_device_delete_instance(struct edac_device_ctl_info *edac_dev, + int idx) +{ + struct edac_device_instance *instance; + int i; + + instance = &edac_dev->instances[idx]; + + /* unregister all blocks in this instance */ + for (i = 0; i < instance->nr_blocks; i++) + edac_device_delete_block(edac_dev, &instance->blocks[i]); + + /* unregister this instance's kobject, SEE: + * edac_device_ctrl_instance_release() for callback operation + */ + kobject_unregister(&instance->kobj); +} + +/* + * edac_device_create_instances + * create the first level of 'instances' for this device + * (ie 'cache' might have 'cache0', 'cache1', 'cache2', etc + */ +static int edac_device_create_instances(struct edac_device_ctl_info *edac_dev) +{ + int i, j; + int err; + + debugf0("%s()\n", __func__); + + /* iterate over creation of the instances */ + for (i = 0; i < edac_dev->nr_instances; i++) { + err = edac_device_create_instance(edac_dev, i); + if (err) { + /* unwind previous instances on error */ + for (j = 0; j < i; j++) + edac_device_delete_instance(edac_dev, j); + return err; + } + } + + return 0; +} + +/* + * edac_device_delete_instances(edac_dev); + * unregister all the kobjects of the instances + */ +static void edac_device_delete_instances(struct edac_device_ctl_info *edac_dev) +{ + int i; + + /* iterate over creation of the instances */ + for (i = 0; i < edac_dev->nr_instances; i++) + edac_device_delete_instance(edac_dev, i); +} + +/* edac_dev sysfs ctor/dtor code */ + +/* + * edac_device_add_main_sysfs_attributes + * add some attributes to this instance's main kobject + */ +static int edac_device_add_main_sysfs_attributes( + struct edac_device_ctl_info *edac_dev) +{ + struct edac_dev_sysfs_attribute *sysfs_attrib; + int err = 0; + + sysfs_attrib = edac_dev->sysfs_attributes; + if (sysfs_attrib) { + /* iterate over the array and create an attribute for each + * entry in the list + */ + while (sysfs_attrib->attr.name != NULL) { + err = sysfs_create_file(&edac_dev->kobj, + (struct attribute*) sysfs_attrib); + if (err) + goto err_out; + + sysfs_attrib++; + } + } + +err_out: + return err; +} + +/* + * edac_device_remove_main_sysfs_attributes + * remove any attributes to this instance's main kobject + */ +static void edac_device_remove_main_sysfs_attributes( + struct edac_device_ctl_info *edac_dev) +{ + struct edac_dev_sysfs_attribute *sysfs_attrib; + + /* if there are main attributes, defined, remove them. First, + * point to the start of the array and iterate over it + * removing each attribute listed from this device's instance's kobject + */ + sysfs_attrib = edac_dev->sysfs_attributes; + if (sysfs_attrib) { + while (sysfs_attrib->attr.name != NULL) { + sysfs_remove_file(&edac_dev->kobj, + (struct attribute *) sysfs_attrib); + sysfs_attrib++; + } + } +} + +/* + * edac_device_create_sysfs() Constructor + * + * accept a created edac_device control structure + * and 'export' it to sysfs. The 'main' kobj should already have been + * created. 'instance' and 'block' kobjects should be registered + * along with any 'block' attributes from the low driver. In addition, + * the main attributes (if any) are connected to the main kobject of + * the control structure. + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev) +{ + int err; + struct kobject *edac_kobj = &edac_dev->kobj; + + debugf0("%s() idx=%d\n", __func__, edac_dev->dev_idx); + + /* go create any main attributes callers wants */ + err = edac_device_add_main_sysfs_attributes(edac_dev); + if (err) { + debugf0("%s() failed to add sysfs attribs\n", __func__); + goto err_out; + } + + /* create a symlink from the edac device + * to the platform 'device' being used for this + */ + err = sysfs_create_link(edac_kobj, + &edac_dev->dev->kobj, EDAC_DEVICE_SYMLINK); + if (err) { + debugf0("%s() sysfs_create_link() returned err= %d\n", + __func__, err); + goto err_remove_main_attribs; + } + + /* Create the first level instance directories + * In turn, the nested blocks beneath the instances will + * be registered as well + */ + err = edac_device_create_instances(edac_dev); + if (err) { + debugf0("%s() edac_device_create_instances() " + "returned err= %d\n", __func__, err); + goto err_remove_link; + } + + + debugf4("%s() create-instances done, idx=%d\n", + __func__, edac_dev->dev_idx); + + return 0; + + /* Error unwind stack */ +err_remove_link: + /* remove the sym link */ + sysfs_remove_link(&edac_dev->kobj, EDAC_DEVICE_SYMLINK); + +err_remove_main_attribs: + edac_device_remove_main_sysfs_attributes(edac_dev); + +err_out: + return err; +} + +/* + * edac_device_remove_sysfs() destructor + * + * given an edac_device struct, tear down the kobject resources + */ +void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev) +{ + debugf0("%s()\n", __func__); + + /* remove any main attributes for this device */ + edac_device_remove_main_sysfs_attributes(edac_dev); + + /* remove the device sym link */ + sysfs_remove_link(&edac_dev->kobj, EDAC_DEVICE_SYMLINK); + + /* walk the instance/block kobject tree, deconstructing it */ + edac_device_delete_instances(edac_dev); +} diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 804875de5801..4471be362599 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -27,1200 +27,20 @@ #include <linux/list.h> #include <linux/sysdev.h> #include <linux/ctype.h> -#include <linux/kthread.h> -#include <linux/freezer.h> +#include <linux/edac.h> #include <asm/uaccess.h> #include <asm/page.h> #include <asm/edac.h> -#include "edac_mc.h" - -#define EDAC_MC_VERSION "Ver: 2.0.1 " __DATE__ - - -#ifdef CONFIG_EDAC_DEBUG -/* Values of 0 to 4 will generate output */ -int edac_debug_level = 1; -EXPORT_SYMBOL_GPL(edac_debug_level); -#endif - -/* EDAC Controls, setable by module parameter, and sysfs */ -static int log_ue = 1; -static int log_ce = 1; -static int panic_on_ue; -static int poll_msec = 1000; +#include "edac_core.h" +#include "edac_module.h" /* lock to memory controller's control array */ -static DECLARE_MUTEX(mem_ctls_mutex); +static DEFINE_MUTEX(mem_ctls_mutex); static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); -static struct task_struct *edac_thread; - -#ifdef CONFIG_PCI -static int check_pci_parity = 0; /* default YES check PCI parity */ -static int panic_on_pci_parity; /* default no panic on PCI Parity */ -static atomic_t pci_parity_count = ATOMIC_INIT(0); - -static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ -static struct completion edac_pci_kobj_complete; -#endif /* CONFIG_PCI */ - -/* START sysfs data and methods */ - - -static const char *mem_types[] = { - [MEM_EMPTY] = "Empty", - [MEM_RESERVED] = "Reserved", - [MEM_UNKNOWN] = "Unknown", - [MEM_FPM] = "FPM", - [MEM_EDO] = "EDO", - [MEM_BEDO] = "BEDO", - [MEM_SDR] = "Unbuffered-SDR", - [MEM_RDR] = "Registered-SDR", - [MEM_DDR] = "Unbuffered-DDR", - [MEM_RDDR] = "Registered-DDR", - [MEM_RMBS] = "RMBS" -}; - -static const char *dev_types[] = { - [DEV_UNKNOWN] = "Unknown", - [DEV_X1] = "x1", - [DEV_X2] = "x2", - [DEV_X4] = "x4", - [DEV_X8] = "x8", - [DEV_X16] = "x16", - [DEV_X32] = "x32", - [DEV_X64] = "x64" -}; - -static const char *edac_caps[] = { - [EDAC_UNKNOWN] = "Unknown", - [EDAC_NONE] = "None", - [EDAC_RESERVED] = "Reserved", - [EDAC_PARITY] = "PARITY", - [EDAC_EC] = "EC", - [EDAC_SECDED] = "SECDED", - [EDAC_S2ECD2ED] = "S2ECD2ED", - [EDAC_S4ECD4ED] = "S4ECD4ED", - [EDAC_S8ECD8ED] = "S8ECD8ED", - [EDAC_S16ECD16ED] = "S16ECD16ED" -}; - -/* sysfs object: /sys/devices/system/edac */ -static struct sysdev_class edac_class = { - set_kset_name("edac"), -}; - -/* sysfs object: - * /sys/devices/system/edac/mc - */ -static struct kobject edac_memctrl_kobj; - -/* We use these to wait for the reference counts on edac_memctrl_kobj and - * edac_pci_kobj to reach 0. - */ -static struct completion edac_memctrl_kobj_complete; - -/* - * /sys/devices/system/edac/mc; - * data structures and methods - */ -static ssize_t memctrl_int_show(void *ptr, char *buffer) -{ - int *value = (int*) ptr; - return sprintf(buffer, "%u\n", *value); -} - -static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) -{ - int *value = (int*) ptr; - - if (isdigit(*buffer)) - *value = simple_strtoul(buffer, NULL, 0); - - return count; -} - -struct memctrl_dev_attribute { - struct attribute attr; - void *value; - ssize_t (*show)(void *,char *); - ssize_t (*store)(void *, const char *, size_t); -}; - -/* Set of show/store abstract level functions for memory control object */ -static ssize_t memctrl_dev_show(struct kobject *kobj, - struct attribute *attr, char *buffer) -{ - struct memctrl_dev_attribute *memctrl_dev; - memctrl_dev = (struct memctrl_dev_attribute*)attr; - - if (memctrl_dev->show) - return memctrl_dev->show(memctrl_dev->value, buffer); - - return -EIO; -} - -static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct memctrl_dev_attribute *memctrl_dev; - memctrl_dev = (struct memctrl_dev_attribute*)attr; - - if (memctrl_dev->store) - return memctrl_dev->store(memctrl_dev->value, buffer, count); - - return -EIO; -} - -static struct sysfs_ops memctrlfs_ops = { - .show = memctrl_dev_show, - .store = memctrl_dev_store -}; - -#define MEMCTRL_ATTR(_name,_mode,_show,_store) \ -struct memctrl_dev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = &_name, \ - .show = _show, \ - .store = _store, \ -}; - -#define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \ -struct memctrl_dev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = _data, \ - .show = _show, \ - .store = _store, \ -}; - -/* csrow<id> control files */ -MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); -MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); -MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); -MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); - -/* Base Attributes of the memory ECC object */ -static struct memctrl_dev_attribute *memctrl_attr[] = { - &attr_panic_on_ue, - &attr_log_ue, - &attr_log_ce, - &attr_poll_msec, - NULL, -}; - -/* Main MC kobject release() function */ -static void edac_memctrl_master_release(struct kobject *kobj) -{ - debugf1("%s()\n", __func__); - complete(&edac_memctrl_kobj_complete); -} - -static struct kobj_type ktype_memctrl = { - .release = edac_memctrl_master_release, - .sysfs_ops = &memctrlfs_ops, - .default_attrs = (struct attribute **) memctrl_attr, -}; - -/* Initialize the main sysfs entries for edac: - * /sys/devices/system/edac - * - * and children - * - * Return: 0 SUCCESS - * !0 FAILURE - */ -static int edac_sysfs_memctrl_setup(void) -{ - int err = 0; - - debugf1("%s()\n", __func__); - - /* create the /sys/devices/system/edac directory */ - err = sysdev_class_register(&edac_class); - - if (err) { - debugf1("%s() error=%d\n", __func__, err); - return err; - } - - /* Init the MC's kobject */ - memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj)); - edac_memctrl_kobj.parent = &edac_class.kset.kobj; - edac_memctrl_kobj.ktype = &ktype_memctrl; - - /* generate sysfs "..../edac/mc" */ - err = kobject_set_name(&edac_memctrl_kobj,"mc"); - - if (err) - goto fail; - - /* FIXME: maybe new sysdev_create_subdir() */ - err = kobject_register(&edac_memctrl_kobj); - - if (err) { - debugf1("Failed to register '.../edac/mc'\n"); - goto fail; - } - - debugf1("Registered '.../edac/mc' kobject\n"); - - return 0; - -fail: - sysdev_class_unregister(&edac_class); - return err; -} - -/* - * MC teardown: - * the '..../edac/mc' kobject followed by '..../edac' itself - */ -static void edac_sysfs_memctrl_teardown(void) -{ - debugf0("MC: " __FILE__ ": %s()\n", __func__); - - /* Unregister the MC's kobject and wait for reference count to reach - * 0. - */ - init_completion(&edac_memctrl_kobj_complete); - kobject_unregister(&edac_memctrl_kobj); - wait_for_completion(&edac_memctrl_kobj_complete); - - /* Unregister the 'edac' object */ - sysdev_class_unregister(&edac_class); -} - -#ifdef CONFIG_PCI -static ssize_t edac_pci_int_show(void *ptr, char *buffer) -{ - int *value = ptr; - return sprintf(buffer,"%d\n",*value); -} - -static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) -{ - int *value = ptr; - - if (isdigit(*buffer)) - *value = simple_strtoul(buffer,NULL,0); - - return count; -} - -struct edac_pci_dev_attribute { - struct attribute attr; - void *value; - ssize_t (*show)(void *,char *); - ssize_t (*store)(void *, const char *,size_t); -}; - -/* Set of show/store abstract level functions for PCI Parity object */ -static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, - char *buffer) -{ - struct edac_pci_dev_attribute *edac_pci_dev; - edac_pci_dev= (struct edac_pci_dev_attribute*)attr; - - if (edac_pci_dev->show) - return edac_pci_dev->show(edac_pci_dev->value, buffer); - return -EIO; -} - -static ssize_t edac_pci_dev_store(struct kobject *kobj, - struct attribute *attr, const char *buffer, size_t count) -{ - struct edac_pci_dev_attribute *edac_pci_dev; - edac_pci_dev= (struct edac_pci_dev_attribute*)attr; - - if (edac_pci_dev->show) - return edac_pci_dev->store(edac_pci_dev->value, buffer, count); - return -EIO; -} - -static struct sysfs_ops edac_pci_sysfs_ops = { - .show = edac_pci_dev_show, - .store = edac_pci_dev_store -}; - -#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ -struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = &_name, \ - .show = _show, \ - .store = _store, \ -}; - -#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ -struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .value = _data, \ - .show = _show, \ - .store = _store, \ -}; - -/* PCI Parity control files */ -EDAC_PCI_ATTR(check_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, - edac_pci_int_store); -EDAC_PCI_ATTR(panic_on_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, - edac_pci_int_store); -EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); - -/* Base Attributes of the memory ECC object */ -static struct edac_pci_dev_attribute *edac_pci_attr[] = { - &edac_pci_attr_check_pci_parity, - &edac_pci_attr_panic_on_pci_parity, - &edac_pci_attr_pci_parity_count, - NULL, -}; - -/* No memory to release */ -static void edac_pci_release(struct kobject *kobj) -{ - debugf1("%s()\n", __func__); - complete(&edac_pci_kobj_complete); -} - -static struct kobj_type ktype_edac_pci = { - .release = edac_pci_release, - .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = (struct attribute **) edac_pci_attr, -}; - -/** - * edac_sysfs_pci_setup() - * - */ -static int edac_sysfs_pci_setup(void) -{ - int err; - - debugf1("%s()\n", __func__); - - memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj)); - edac_pci_kobj.parent = &edac_class.kset.kobj; - edac_pci_kobj.ktype = &ktype_edac_pci; - err = kobject_set_name(&edac_pci_kobj, "pci"); - - if (!err) { - /* Instanstiate the csrow object */ - /* FIXME: maybe new sysdev_create_subdir() */ - err = kobject_register(&edac_pci_kobj); - - if (err) - debugf1("Failed to register '.../edac/pci'\n"); - else - debugf1("Registered '.../edac/pci' kobject\n"); - } - - return err; -} - -static void edac_sysfs_pci_teardown(void) -{ - debugf0("%s()\n", __func__); - init_completion(&edac_pci_kobj_complete); - kobject_unregister(&edac_pci_kobj); - wait_for_completion(&edac_pci_kobj_complete); -} - - -static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) -{ - int where; - u16 status; - - where = secondary ? PCI_SEC_STATUS : PCI_STATUS; - pci_read_config_word(dev, where, &status); - - /* If we get back 0xFFFF then we must suspect that the card has been - * pulled but the Linux PCI layer has not yet finished cleaning up. - * We don't want to report on such devices - */ - - if (status == 0xFFFF) { - u32 sanity; - - pci_read_config_dword(dev, 0, &sanity); - - if (sanity == 0xFFFFFFFF) - return 0; - } - - status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | - PCI_STATUS_PARITY; - - if (status) - /* reset only the bits we are interested in */ - pci_write_config_word(dev, where, status); - - return status; -} - -typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); - -/* Clear any PCI parity errors logged by this device. */ -static void edac_pci_dev_parity_clear(struct pci_dev *dev) -{ - u8 header_type; - - get_pci_parity_status(dev, 0); - - /* read the device TYPE, looking for bridges */ - pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); - - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) - get_pci_parity_status(dev, 1); -} - -/* - * PCI Parity polling - * - */ -static void edac_pci_dev_parity_test(struct pci_dev *dev) -{ - u16 status; - u8 header_type; - - /* read the STATUS register on this device - */ - status = get_pci_parity_status(dev, 0); - - debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); - - /* check the status reg for errors */ - if (status) { - if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) - edac_printk(KERN_CRIT, EDAC_PCI, - "Signaled System Error on %s\n", - pci_name(dev)); - - if (status & (PCI_STATUS_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, - "Master Data Parity Error on %s\n", - pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - - if (status & (PCI_STATUS_DETECTED_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, - "Detected Parity Error on %s\n", - pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - } - - /* read the device TYPE, looking for bridges */ - pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); - - debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); - - if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { - /* On bridges, need to examine secondary status register */ - status = get_pci_parity_status(dev, 1); - - debugf2("PCI SEC_STATUS= 0x%04x %s\n", - status, dev->dev.bus_id ); - - /* check the secondary status reg for errors */ - if (status) { - if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) - edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " - "Signaled System Error on %s\n", - pci_name(dev)); - - if (status & (PCI_STATUS_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " - "Master Data Parity Error on " - "%s\n", pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - - if (status & (PCI_STATUS_DETECTED_PARITY)) { - edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " - "Detected Parity Error on %s\n", - pci_name(dev)); - - atomic_inc(&pci_parity_count); - } - } - } -} - -/* - * pci_dev parity list iterator - * Scan the PCI device list for one iteration, looking for SERRORs - * Master Parity ERRORS or Parity ERRORs on primary or secondary devices - */ -static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) -{ - struct pci_dev *dev = NULL; - - /* request for kernel access to the next PCI device, if any, - * and while we are looking at it have its reference count - * bumped until we are done with it - */ - while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - fn(dev); - } -} - -static void do_pci_parity_check(void) -{ - unsigned long flags; - int before_count; - - debugf3("%s()\n", __func__); - - if (!check_pci_parity) - return; - - before_count = atomic_read(&pci_parity_count); - - /* scan all PCI devices looking for a Parity Error on devices and - * bridges - */ - local_irq_save(flags); - edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); - local_irq_restore(flags); - - /* Only if operator has selected panic on PCI Error */ - if (panic_on_pci_parity) { - /* If the count is different 'after' from 'before' */ - if (before_count != atomic_read(&pci_parity_count)) - panic("EDAC: PCI Parity Error"); - } -} - -static inline void clear_pci_parity_errors(void) -{ - /* Clear any PCI bus parity errors that devices initially have logged - * in their registers. - */ - edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); -} - -#else /* CONFIG_PCI */ - -/* pre-process these away */ -#define do_pci_parity_check() -#define clear_pci_parity_errors() -#define edac_sysfs_pci_teardown() -#define edac_sysfs_pci_setup() (0) - -#endif /* CONFIG_PCI */ - -/* EDAC sysfs CSROW data structures and methods - */ - -/* Set of more default csrow<id> attribute show/store functions */ -static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%u\n", csrow->ue_count); -} - -static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%u\n", csrow->ce_count); -} - -static ssize_t csrow_size_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages)); -} - -static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%s\n", mem_types[csrow->mtype]); -} - -static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%s\n", dev_types[csrow->dtype]); -} - -static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, int private) -{ - return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]); -} - -/* show/store functions for DIMM Label attributes */ -static ssize_t channel_dimm_label_show(struct csrow_info *csrow, - char *data, int channel) -{ - return snprintf(data, EDAC_MC_LABEL_LEN,"%s", - csrow->channels[channel].label); -} - -static ssize_t channel_dimm_label_store(struct csrow_info *csrow, - const char *data, - size_t count, - int channel) -{ - ssize_t max_size = 0; - - max_size = min((ssize_t)count,(ssize_t)EDAC_MC_LABEL_LEN-1); - strncpy(csrow->channels[channel].label, data, max_size); - csrow->channels[channel].label[max_size] = '\0'; - - return max_size; -} - -/* show function for dynamic chX_ce_count attribute */ -static ssize_t channel_ce_count_show(struct csrow_info *csrow, - char *data, - int channel) -{ - return sprintf(data, "%u\n", csrow->channels[channel].ce_count); -} - -/* csrow specific attribute structure */ -struct csrowdev_attribute { - struct attribute attr; - ssize_t (*show)(struct csrow_info *,char *,int); - ssize_t (*store)(struct csrow_info *, const char *,size_t,int); - int private; -}; - -#define to_csrow(k) container_of(k, struct csrow_info, kobj) -#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr) - -/* Set of show/store higher level functions for default csrow attributes */ -static ssize_t csrowdev_show(struct kobject *kobj, - struct attribute *attr, - char *buffer) -{ - struct csrow_info *csrow = to_csrow(kobj); - struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); - - if (csrowdev_attr->show) - return csrowdev_attr->show(csrow, - buffer, - csrowdev_attr->private); - return -EIO; -} - -static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct csrow_info *csrow = to_csrow(kobj); - struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr); - - if (csrowdev_attr->store) - return csrowdev_attr->store(csrow, - buffer, - count, - csrowdev_attr->private); - return -EIO; -} - -static struct sysfs_ops csrowfs_ops = { - .show = csrowdev_show, - .store = csrowdev_store -}; - -#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \ -struct csrowdev_attribute attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .private = _private, \ -}; - -/* default cwrow<id>/attribute files */ -CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL,0); -CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL,0); -CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL,0); -CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL,0); -CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL,0); -CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL,0); - -/* default attributes of the CSROW<id> object */ -static struct csrowdev_attribute *default_csrow_attr[] = { - &attr_dev_type, - &attr_mem_type, - &attr_edac_mode, - &attr_size_mb, - &attr_ue_count, - &attr_ce_count, - NULL, -}; - - -/* possible dynamic channel DIMM Label attribute files */ -CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 0 ); -CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 1 ); -CSROWDEV_ATTR(ch2_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 2 ); -CSROWDEV_ATTR(ch3_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 3 ); -CSROWDEV_ATTR(ch4_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 4 ); -CSROWDEV_ATTR(ch5_dimm_label,S_IRUGO|S_IWUSR, - channel_dimm_label_show, - channel_dimm_label_store, - 5 ); - -/* Total possible dynamic DIMM Label attribute file table */ -static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = { - &attr_ch0_dimm_label, - &attr_ch1_dimm_label, - &attr_ch2_dimm_label, - &attr_ch3_dimm_label, - &attr_ch4_dimm_label, - &attr_ch5_dimm_label -}; - -/* possible dynamic channel ce_count attribute files */ -CSROWDEV_ATTR(ch0_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 0 ); -CSROWDEV_ATTR(ch1_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 1 ); -CSROWDEV_ATTR(ch2_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 2 ); -CSROWDEV_ATTR(ch3_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 3 ); -CSROWDEV_ATTR(ch4_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 4 ); -CSROWDEV_ATTR(ch5_ce_count,S_IRUGO|S_IWUSR, - channel_ce_count_show, - NULL, - 5 ); - -/* Total possible dynamic ce_count attribute file table */ -static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = { - &attr_ch0_ce_count, - &attr_ch1_ce_count, - &attr_ch2_ce_count, - &attr_ch3_ce_count, - &attr_ch4_ce_count, - &attr_ch5_ce_count -}; - - -#define EDAC_NR_CHANNELS 6 - -/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */ -static int edac_create_channel_files(struct kobject *kobj, int chan) -{ - int err=-ENODEV; - - if (chan >= EDAC_NR_CHANNELS) - return err; - - /* create the DIMM label attribute file */ - err = sysfs_create_file(kobj, - (struct attribute *) dynamic_csrow_dimm_attr[chan]); - - if (!err) { - /* create the CE Count attribute file */ - err = sysfs_create_file(kobj, - (struct attribute *) dynamic_csrow_ce_count_attr[chan]); - } else { - debugf1("%s() dimm labels and ce_count files created", __func__); - } - - return err; -} - -/* No memory to release for this kobj */ -static void edac_csrow_instance_release(struct kobject *kobj) -{ - struct csrow_info *cs; - - cs = container_of(kobj, struct csrow_info, kobj); - complete(&cs->kobj_complete); -} - -/* the kobj_type instance for a CSROW */ -static struct kobj_type ktype_csrow = { - .release = edac_csrow_instance_release, - .sysfs_ops = &csrowfs_ops, - .default_attrs = (struct attribute **) default_csrow_attr, -}; - -/* Create a CSROW object under specifed edac_mc_device */ -static int edac_create_csrow_object( - struct kobject *edac_mci_kobj, - struct csrow_info *csrow, - int index) -{ - int err = 0; - int chan; - - memset(&csrow->kobj, 0, sizeof(csrow->kobj)); - - /* generate ..../edac/mc/mc<id>/csrow<index> */ - - csrow->kobj.parent = edac_mci_kobj; - csrow->kobj.ktype = &ktype_csrow; - - /* name this instance of csrow<id> */ - err = kobject_set_name(&csrow->kobj,"csrow%d",index); - if (err) - goto error_exit; - - /* Instanstiate the csrow object */ - err = kobject_register(&csrow->kobj); - if (!err) { - /* Create the dyanmic attribute files on this csrow, - * namely, the DIMM labels and the channel ce_count - */ - for (chan = 0; chan < csrow->nr_channels; chan++) { - err = edac_create_channel_files(&csrow->kobj,chan); - if (err) - break; - } - } - -error_exit: - return err; -} - -/* default sysfs methods and data structures for the main MCI kobject */ - -static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, - const char *data, size_t count) -{ - int row, chan; - - mci->ue_noinfo_count = 0; - mci->ce_noinfo_count = 0; - mci->ue_count = 0; - mci->ce_count = 0; - - for (row = 0; row < mci->nr_csrows; row++) { - struct csrow_info *ri = &mci->csrows[row]; - - ri->ue_count = 0; - ri->ce_count = 0; - - for (chan = 0; chan < ri->nr_channels; chan++) - ri->channels[chan].ce_count = 0; - } - - mci->start_time = jiffies; - return count; -} - -/* memory scrubbing */ -static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, - const char *data, size_t count) -{ - u32 bandwidth = -1; - - if (mci->set_sdram_scrub_rate) { - - memctrl_int_store(&bandwidth, data, count); - - if (!(*mci->set_sdram_scrub_rate)(mci, &bandwidth)) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set successfully, applied: %d\n", - bandwidth); - } else { - /* FIXME: error codes maybe? */ - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set FAILED, could not apply: %d\n", - bandwidth); - } - } else { - /* FIXME: produce "not implemented" ERROR for user-side. */ - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrubbing 'set'control is not implemented!\n"); - } - return count; -} - -static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) -{ - u32 bandwidth = -1; - - if (mci->get_sdram_scrub_rate) { - if (!(*mci->get_sdram_scrub_rate)(mci, &bandwidth)) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate successfully, fetched: %d\n", - bandwidth); - } else { - /* FIXME: error codes maybe? */ - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate fetch FAILED, got: %d\n", - bandwidth); - } - } else { - /* FIXME: produce "not implemented" ERROR for user-side. */ - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrubbing 'get' control is not implemented!\n"); - } - return sprintf(data, "%d\n", bandwidth); -} - -/* default attribute files for the MCI object */ -static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ue_count); -} - -static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ce_count); -} - -static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ce_noinfo_count); -} - -static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%d\n", mci->ue_noinfo_count); -} - -static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ); -} - -static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data) -{ - return sprintf(data,"%s\n", mci->ctl_name); -} - -static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data) -{ - int total_pages, csrow_idx; - - for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows; - csrow_idx++) { - struct csrow_info *csrow = &mci->csrows[csrow_idx]; - - if (!csrow->nr_pages) - continue; - - total_pages += csrow->nr_pages; - } - - return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages)); -} - -struct mcidev_attribute { - struct attribute attr; - ssize_t (*show)(struct mem_ctl_info *,char *); - ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); -}; - -#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj) -#define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr) - -/* MCI show/store functions for top most object */ -static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, - char *buffer) -{ - struct mem_ctl_info *mem_ctl_info = to_mci(kobj); - struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr); - - if (mcidev_attr->show) - return mcidev_attr->show(mem_ctl_info, buffer); - - return -EIO; -} - -static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct mem_ctl_info *mem_ctl_info = to_mci(kobj); - struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr); - - if (mcidev_attr->store) - return mcidev_attr->store(mem_ctl_info, buffer, count); - - return -EIO; -} - -static struct sysfs_ops mci_ops = { - .show = mcidev_show, - .store = mcidev_store -}; - -#define MCIDEV_ATTR(_name,_mode,_show,_store) \ -struct mcidev_attribute mci_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ -}; - -/* default Control file */ -MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store); - -/* default Attribute files */ -MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL); -MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL); -MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL); -MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL); -MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL); -MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL); -MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL); - -/* memory scrubber attribute file */ -MCIDEV_ATTR(sdram_scrub_rate,S_IRUGO|S_IWUSR,mci_sdram_scrub_rate_show,mci_sdram_scrub_rate_store); - -static struct mcidev_attribute *mci_attr[] = { - &mci_attr_reset_counters, - &mci_attr_mc_name, - &mci_attr_size_mb, - &mci_attr_seconds_since_reset, - &mci_attr_ue_noinfo_count, - &mci_attr_ce_noinfo_count, - &mci_attr_ue_count, - &mci_attr_ce_count, - &mci_attr_sdram_scrub_rate, - NULL -}; - -/* - * Release of a MC controlling instance - */ -static void edac_mci_instance_release(struct kobject *kobj) -{ - struct mem_ctl_info *mci; - - mci = to_mci(kobj); - debugf0("%s() idx=%d\n", __func__, mci->mc_idx); - complete(&mci->kobj_complete); -} - -static struct kobj_type ktype_mci = { - .release = edac_mci_instance_release, - .sysfs_ops = &mci_ops, - .default_attrs = (struct attribute **) mci_attr, -}; - - -#define EDAC_DEVICE_SYMLINK "device" - -/* - * Create a new Memory Controller kobject instance, - * mc<id> under the 'mc' directory - * - * Return: - * 0 Success - * !0 Failure - */ -static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) -{ - int i; - int err; - struct csrow_info *csrow; - struct kobject *edac_mci_kobj=&mci->edac_mci_kobj; - - debugf0("%s() idx=%d\n", __func__, mci->mc_idx); - memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj)); - - /* set the name of the mc<id> object */ - err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx); - if (err) - return err; - - /* link to our parent the '..../edac/mc' object */ - edac_mci_kobj->parent = &edac_memctrl_kobj; - edac_mci_kobj->ktype = &ktype_mci; - - /* register the mc<id> kobject */ - err = kobject_register(edac_mci_kobj); - if (err) - return err; - - /* create a symlink for the device */ - err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj, - EDAC_DEVICE_SYMLINK); - if (err) - goto fail0; - - /* Make directories for each CSROW object - * under the mc<id> kobject - */ - for (i = 0; i < mci->nr_csrows; i++) { - csrow = &mci->csrows[i]; - - /* Only expose populated CSROWs */ - if (csrow->nr_pages > 0) { - err = edac_create_csrow_object(edac_mci_kobj,csrow,i); - if (err) - goto fail1; - } - } - - return 0; - - /* CSROW error: backout what has already been registered, */ -fail1: - for ( i--; i >= 0; i--) { - if (csrow->nr_pages > 0) { - init_completion(&csrow->kobj_complete); - kobject_unregister(&mci->csrows[i].kobj); - wait_for_completion(&csrow->kobj_complete); - } - } - -fail0: - init_completion(&mci->kobj_complete); - kobject_unregister(edac_mci_kobj); - wait_for_completion(&mci->kobj_complete); - return err; -} - -/* - * remove a Memory Controller instance - */ -static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) -{ - int i; - - debugf0("%s()\n", __func__); - - /* remove all csrow kobjects */ - for (i = 0; i < mci->nr_csrows; i++) { - if (mci->csrows[i].nr_pages > 0) { - init_completion(&mci->csrows[i].kobj_complete); - kobject_unregister(&mci->csrows[i].kobj); - wait_for_completion(&mci->csrows[i].kobj_complete); - } - } - - sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK); - init_completion(&mci->kobj_complete); - kobject_unregister(&mci->edac_mci_kobj); - wait_for_completion(&mci->kobj_complete); -} - -/* END OF sysfs data and methods */ - #ifdef CONFIG_EDAC_DEBUG -void edac_mc_dump_channel(struct channel_info *chan) +static void edac_mc_dump_channel(struct channel_info *chan) { debugf4("\tchannel = %p\n", chan); debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); @@ -1228,25 +48,21 @@ void edac_mc_dump_channel(struct channel_info *chan) debugf4("\tchannel->label = '%s'\n", chan->label); debugf4("\tchannel->csrow = %p\n\n", chan->csrow); } -EXPORT_SYMBOL_GPL(edac_mc_dump_channel); -void edac_mc_dump_csrow(struct csrow_info *csrow) +static void edac_mc_dump_csrow(struct csrow_info *csrow) { debugf4("\tcsrow = %p\n", csrow); debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); - debugf4("\tcsrow->first_page = 0x%lx\n", - csrow->first_page); + debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); - debugf4("\tcsrow->nr_channels = %d\n", - csrow->nr_channels); + debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); debugf4("\tcsrow->channels = %p\n", csrow->channels); debugf4("\tcsrow->mci = %p\n\n", csrow->mci); } -EXPORT_SYMBOL_GPL(edac_mc_dump_csrow); -void edac_mc_dump_mci(struct mem_ctl_info *mci) +static void edac_mc_dump_mci(struct mem_ctl_info *mci) { debugf3("\tmci = %p\n", mci); debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); @@ -1256,13 +72,11 @@ void edac_mc_dump_mci(struct mem_ctl_info *mci) debugf3("\tmci->nr_csrows = %d, csrows = %p\n", mci->nr_csrows, mci->csrows); debugf3("\tdev = %p\n", mci->dev); - debugf3("\tmod_name:ctl_name = %s:%s\n", - mci->mod_name, mci->ctl_name); + debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); debugf3("\tpvt_info = %p\n\n", mci->pvt_info); } -EXPORT_SYMBOL_GPL(edac_mc_dump_mci); -#endif /* CONFIG_EDAC_DEBUG */ +#endif /* CONFIG_EDAC_DEBUG */ /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. * Adjust 'ptr' so that its alignment is at least as stringent as what the @@ -1271,7 +85,7 @@ EXPORT_SYMBOL_GPL(edac_mc_dump_mci); * If 'size' is a constant, the compiler will optimize this whole function * down to either a no-op or the addition of a constant to the value of 'ptr'. */ -static inline char * align_ptr(void *ptr, unsigned size) +void *edac_align_ptr(void *ptr, unsigned size) { unsigned align, r; @@ -1288,14 +102,14 @@ static inline char * align_ptr(void *ptr, unsigned size) else if (size > sizeof(char)) align = sizeof(short); else - return (char *) ptr; + return (char *)ptr; r = size % align; if (r == 0) - return (char *) ptr; + return (char *)ptr; - return (char *) (((unsigned long) ptr) + align - r); + return (void *)(((unsigned long)ptr) + align - r); } /** @@ -1315,7 +129,7 @@ static inline char * align_ptr(void *ptr, unsigned size) * struct mem_ctl_info pointer */ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, - unsigned nr_chans) + unsigned nr_chans, int edac_index) { struct mem_ctl_info *mci; struct csrow_info *csi, *csrow; @@ -1323,30 +137,32 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, void *pvt; unsigned size; int row, chn; + int err; /* Figure out the offsets of the various items from the start of an mc * structure. We want the alignment of each item to be at least as * stringent as what the compiler would provide if we could simply * hardcode everything into a single struct. */ - mci = (struct mem_ctl_info *) 0; - csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi)); - chi = (struct channel_info *) - align_ptr(&csi[nr_csrows], sizeof(*chi)); - pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); - size = ((unsigned long) pvt) + sz_pvt; - - if ((mci = kmalloc(size, GFP_KERNEL)) == NULL) + mci = (struct mem_ctl_info *)0; + csi = edac_align_ptr(&mci[1], sizeof(*csi)); + chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); + pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); + size = ((unsigned long)pvt) + sz_pvt; + + mci = kzalloc(size, GFP_KERNEL); + if (mci == NULL) return NULL; /* Adjust pointers so they point within the memory we just allocated * rather than an imaginary chunk of memory located at address 0. */ - csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi)); - chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi)); - pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL; + csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); + chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); + pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; - memset(mci, 0, size); /* clear all fields */ + /* setup index and various internal pointers */ + mci->mc_idx = edac_index; mci->csrows = csi; mci->pvt_info = pvt; mci->nr_csrows = nr_csrows; @@ -1366,17 +182,35 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, } } + mci->op_state = OP_ALLOC; + + /* + * Initialize the 'root' kobj for the edac_mc controller + */ + err = edac_mc_register_sysfs_main_kobj(mci); + if (err) { + kfree(mci); + return NULL; + } + + /* at this point, the root kobj is valid, and in order to + * 'free' the object, then the function: + * edac_mc_unregister_sysfs_main_kobj() must be called + * which will perform kobj unregistration and the actual free + * will occur during the kobject callback operation + */ return mci; } EXPORT_SYMBOL_GPL(edac_mc_alloc); /** - * edac_mc_free: Free a previously allocated 'mci' structure + * edac_mc_free + * 'Free' a previously allocated 'mci' structure * @mci: pointer to a struct mem_ctl_info structure */ void edac_mc_free(struct mem_ctl_info *mci) { - kfree(mci); + edac_mc_unregister_sysfs_main_kobj(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); @@ -1397,18 +231,136 @@ static struct mem_ctl_info *find_mci_by_dev(struct device *dev) return NULL; } +/* + * handler for EDAC to check if NMI type handler has asserted interrupt + */ +static int edac_mc_assert_error_check_and_clear(void) +{ + int old_state; + + if (edac_op_state == EDAC_OPSTATE_POLL) + return 1; + + old_state = edac_err_assert; + edac_err_assert = 0; + + return old_state; +} + +/* + * edac_mc_workq_function + * performs the operation scheduled by a workq request + */ +static void edac_mc_workq_function(struct work_struct *work_req) +{ + struct delayed_work *d_work = (struct delayed_work *)work_req; + struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); + + mutex_lock(&mem_ctls_mutex); + + /* if this control struct has movd to offline state, we are done */ + if (mci->op_state == OP_OFFLINE) { + mutex_unlock(&mem_ctls_mutex); + return; + } + + /* Only poll controllers that are running polled and have a check */ + if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) + mci->edac_check(mci); + + /* + * FIXME: temp place holder for PCI checks, + * goes away when we break out PCI + */ + edac_pci_do_parity_check(); + + mutex_unlock(&mem_ctls_mutex); + + /* Reschedule */ + queue_delayed_work(edac_workqueue, &mci->work, + msecs_to_jiffies(edac_mc_get_poll_msec())); +} + +/* + * edac_mc_workq_setup + * initialize a workq item for this mci + * passing in the new delay period in msec + * + * locking model: + * + * called with the mem_ctls_mutex held + */ +static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) +{ + debugf0("%s()\n", __func__); + + /* if this instance is not in the POLL state, then simply return */ + if (mci->op_state != OP_RUNNING_POLL) + return; + + INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); + queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); +} + +/* + * edac_mc_workq_teardown + * stop the workq processing on this mci + * + * locking model: + * + * called WITHOUT lock held + */ +static void edac_mc_workq_teardown(struct mem_ctl_info *mci) +{ + int status; + + /* if not running POLL, leave now */ + if (mci->op_state == OP_RUNNING_POLL) { + status = cancel_delayed_work(&mci->work); + if (status == 0) { + debugf0("%s() not canceled, flush the queue\n", + __func__); + + /* workq instance might be running, wait for it */ + flush_workqueue(edac_workqueue); + } + } +} + +/* + * edac_reset_delay_period + */ +static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) +{ + /* cancel the current workq request */ + edac_mc_workq_teardown(mci); + + /* lock the list of devices for the new setup */ + mutex_lock(&mem_ctls_mutex); + + /* restart the workq request, with new delay value */ + edac_mc_workq_setup(mci, value); + + mutex_unlock(&mem_ctls_mutex); +} + /* Return 0 on success, 1 on failure. * Before calling this function, caller must * assign a unique value to mci->mc_idx. + * + * locking model: + * + * called with the mem_ctls_mutex lock held */ -static int add_mc_to_global_list (struct mem_ctl_info *mci) +static int add_mc_to_global_list(struct mem_ctl_info *mci) { struct list_head *item, *insert_before; struct mem_ctl_info *p; insert_before = &mc_devices; - if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) + p = find_mci_by_dev(mci->dev); + if (unlikely(p != NULL)) goto fail0; list_for_each(item, &mc_devices) { @@ -1424,18 +376,19 @@ static int add_mc_to_global_list (struct mem_ctl_info *mci) } list_add_tail_rcu(&mci->link, insert_before); + atomic_inc(&edac_handlers); return 0; fail0: edac_printk(KERN_WARNING, EDAC_MC, - "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, - dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx); + "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, + dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); return 1; fail1: edac_printk(KERN_WARNING, EDAC_MC, - "bug in low-level driver: attempt to assign\n" - " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); + "bug in low-level driver: attempt to assign\n" + " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); return 1; } @@ -1450,6 +403,7 @@ static void complete_mc_list_del(struct rcu_head *head) static void del_mc_from_global_list(struct mem_ctl_info *mci) { + atomic_dec(&edac_handlers); list_del_rcu(&mci->link); init_completion(&mci->complete); call_rcu(&mci->rcu, complete_mc_list_del); @@ -1457,6 +411,34 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) } /** + * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. + * + * If found, return a pointer to the structure. + * Else return NULL. + * + * Caller must hold mem_ctls_mutex. + */ +struct mem_ctl_info *edac_mc_find(int idx) +{ + struct list_head *item; + struct mem_ctl_info *mci; + + list_for_each(item, &mc_devices) { + mci = list_entry(item, struct mem_ctl_info, link); + + if (mci->mc_idx >= idx) { + if (mci->mc_idx == idx) + return mci; + + break; + } + } + + return NULL; +} +EXPORT_SYMBOL(edac_mc_find); + +/** * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and * create sysfs entries associated with mci structure * @mci: pointer to the mci structure to be added to the list @@ -1468,10 +450,10 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ /* FIXME - should a warning be printed if no error detection? correction? */ -int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) +int edac_mc_add_mc(struct mem_ctl_info *mci) { debugf0("%s()\n", __func__); - mci->mc_idx = mc_idx; + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -1484,12 +466,12 @@ int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) edac_mc_dump_csrow(&mci->csrows[i]); for (j = 0; j < mci->csrows[i].nr_channels; j++) - edac_mc_dump_channel( - &mci->csrows[i].channels[j]); + edac_mc_dump_channel(&mci->csrows[i]. + channels[j]); } } #endif - down(&mem_ctls_mutex); + mutex_lock(&mem_ctls_mutex); if (add_mc_to_global_list(mci)) goto fail0; @@ -1503,18 +485,28 @@ int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) goto fail1; } + /* If there IS a check routine, then we are running POLLED */ + if (mci->edac_check != NULL) { + /* This instance is NOW RUNNING */ + mci->op_state = OP_RUNNING_POLL; + + edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); + } else { + mci->op_state = OP_RUNNING_INTERRUPT; + } + /* Report action taken */ - edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", - mci->mod_name, mci->ctl_name, dev_name(mci->dev)); + edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" + " DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci)); - up(&mem_ctls_mutex); + mutex_unlock(&mem_ctls_mutex); return 0; fail1: del_mc_from_global_list(mci); fail0: - up(&mem_ctls_mutex); + mutex_unlock(&mem_ctls_mutex); return 1; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -1526,29 +518,41 @@ EXPORT_SYMBOL_GPL(edac_mc_add_mc); * * Return pointer to removed mci structure, or NULL if device not found. */ -struct mem_ctl_info * edac_mc_del_mc(struct device *dev) +struct mem_ctl_info *edac_mc_del_mc(struct device *dev) { struct mem_ctl_info *mci; - debugf0("MC: %s()\n", __func__); - down(&mem_ctls_mutex); + debugf0("%s()\n", __func__); + + mutex_lock(&mem_ctls_mutex); - if ((mci = find_mci_by_dev(dev)) == NULL) { - up(&mem_ctls_mutex); + /* find the requested mci struct in the global list */ + mci = find_mci_by_dev(dev); + if (mci == NULL) { + mutex_unlock(&mem_ctls_mutex); return NULL; } - edac_remove_sysfs_mci_device(mci); + /* marking MCI offline */ + mci->op_state = OP_OFFLINE; + del_mc_from_global_list(mci); - up(&mem_ctls_mutex); + mutex_unlock(&mem_ctls_mutex); + + /* flush workq processes and remove sysfs */ + edac_mc_workq_teardown(mci); + edac_remove_sysfs_mci_device(mci); + edac_printk(KERN_INFO, EDAC_MC, "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, - mci->mod_name, mci->ctl_name, dev_name(mci->dev)); + mci->mod_name, mci->ctl_name, dev_name(mci)); + return mci; } EXPORT_SYMBOL_GPL(edac_mc_del_mc); -void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) +static void edac_mc_scrub_block(unsigned long page, unsigned long offset, + u32 size) { struct page *pg; void *virt_addr; @@ -1557,7 +561,7 @@ void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) debugf3("%s()\n", __func__); /* ECC error page was not in our memory. Ignore it. */ - if(!pfn_valid(page)) + if (!pfn_valid(page)) return; /* Find the actual page structure then map it and fix */ @@ -1577,7 +581,6 @@ void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) if (PageHighMem(pg)) local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(edac_mc_scrub_block); /* FIXME - should return -1 */ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) @@ -1611,7 +614,7 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) if (row == -1) edac_mc_printk(mci, KERN_ERR, "could not look up page error address %lx\n", - (unsigned long) page); + (unsigned long)page); return row; } @@ -1620,8 +623,9 @@ EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); /* FIXME - setable log (warning/emerg) levels */ /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ void edac_mc_handle_ce(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - unsigned long syndrome, int row, int channel, const char *msg) + unsigned long page_frame_number, + unsigned long offset_in_page, unsigned long syndrome, + int row, int channel, const char *msg) { unsigned long remapped_page; @@ -1647,7 +651,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci, return; } - if (log_ce) + if (edac_mc_get_log_ce()) /* FIXME - put in DIMM location */ edac_mc_printk(mci, KERN_WARNING, "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " @@ -1671,18 +675,18 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci, * page - which can then be scrubbed. */ remapped_page = mci->ctl_page_to_phys ? - mci->ctl_page_to_phys(mci, page_frame_number) : - page_frame_number; + mci->ctl_page_to_phys(mci, page_frame_number) : + page_frame_number; edac_mc_scrub_block(remapped_page, offset_in_page, - mci->csrows[row].grain); + mci->csrows[row].grain); } } EXPORT_SYMBOL_GPL(edac_mc_handle_ce); void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) { - if (log_ce) + if (edac_mc_get_log_ce()) edac_mc_printk(mci, KERN_WARNING, "CE - no information available: %s\n", msg); @@ -1692,8 +696,8 @@ void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); void edac_mc_handle_ue(struct mem_ctl_info *mci, - unsigned long page_frame_number, unsigned long offset_in_page, - int row, const char *msg) + unsigned long page_frame_number, + unsigned long offset_in_page, int row, const char *msg) { int len = EDAC_MC_LABEL_LEN * 4; char labels[len + 1]; @@ -1714,26 +718,26 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci, } chars = snprintf(pos, len + 1, "%s", - mci->csrows[row].channels[0].label); + mci->csrows[row].channels[0].label); len -= chars; pos += chars; for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); - chan++) { + chan++) { chars = snprintf(pos, len + 1, ":%s", - mci->csrows[row].channels[chan].label); + mci->csrows[row].channels[chan].label); len -= chars; pos += chars; } - if (log_ue) + if (edac_mc_get_log_ue()) edac_mc_printk(mci, KERN_EMERG, "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " "labels \"%s\": %s\n", page_frame_number, - offset_in_page, mci->csrows[row].grain, row, labels, - msg); + offset_in_page, mci->csrows[row].grain, row, + labels, msg); - if (panic_on_ue) + if (edac_mc_get_panic_on_ue()) panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " "row %d, labels \"%s\": %s\n", mci->mc_idx, page_frame_number, offset_in_page, @@ -1746,10 +750,10 @@ EXPORT_SYMBOL_GPL(edac_mc_handle_ue); void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) { - if (panic_on_ue) + if (edac_mc_get_panic_on_ue()) panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); - if (log_ue) + if (edac_mc_get_log_ue()) edac_mc_printk(mci, KERN_WARNING, "UE - no information available: %s\n", msg); mci->ue_noinfo_count++; @@ -1757,16 +761,14 @@ void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) } EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); - /************************************************************* * On Fully Buffered DIMM modules, this help function is * called to process UE events */ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channela, - unsigned int channelb, - char *msg) + unsigned int csrow, + unsigned int channela, + unsigned int channelb, char *msg) { int len = EDAC_MC_LABEL_LEN * 4; char labels[len + 1]; @@ -1808,20 +810,21 @@ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, /* Generate the DIMM labels from the specified channels */ chars = snprintf(pos, len + 1, "%s", mci->csrows[csrow].channels[channela].label); - len -= chars; pos += chars; + len -= chars; + pos += chars; chars = snprintf(pos, len + 1, "-%s", mci->csrows[csrow].channels[channelb].label); - if (log_ue) + if (edac_mc_get_log_ue()) edac_mc_printk(mci, KERN_EMERG, "UE row %d, channel-a= %d channel-b= %d " "labels \"%s\": %s\n", csrow, channela, channelb, labels, msg); - if (panic_on_ue) + if (edac_mc_get_panic_on_ue()) panic("UE row %d, channel-a= %d channel-b= %d " - "labels \"%s\": %s\n", csrow, channela, - channelb, labels, msg); + "labels \"%s\": %s\n", csrow, channela, + channelb, labels, msg); } EXPORT_SYMBOL(edac_mc_handle_fbd_ue); @@ -1830,9 +833,7 @@ EXPORT_SYMBOL(edac_mc_handle_fbd_ue); * called to process CE events */ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, - unsigned int csrow, - unsigned int channel, - char *msg) + unsigned int csrow, unsigned int channel, char *msg) { /* Ensure boundary values */ @@ -1853,13 +854,12 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, return; } - if (log_ce) + if (edac_mc_get_log_ce()) /* FIXME - put in DIMM location */ edac_mc_printk(mci, KERN_WARNING, "CE row %d, channel %d, label \"%s\": %s\n", csrow, channel, - mci->csrows[csrow].channels[channel].label, - msg); + mci->csrows[csrow].channels[channel].label, msg); mci->ce_count++; mci->csrows[csrow].ce_count++; @@ -1867,17 +867,16 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, } EXPORT_SYMBOL(edac_mc_handle_fbd_ce); - /* * Iterate over all MC instances and check for ECC, et al, errors */ -static inline void check_mc_devices(void) +void edac_check_mc_devices(void) { struct list_head *item; struct mem_ctl_info *mci; debugf3("%s()\n", __func__); - down(&mem_ctls_mutex); + mutex_lock(&mem_ctls_mutex); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); @@ -1886,120 +885,5 @@ static inline void check_mc_devices(void) mci->edac_check(mci); } - up(&mem_ctls_mutex); -} - -/* - * Check MC status every poll_msec. - * Check PCI status every poll_msec as well. - * - * This where the work gets done for edac. - * - * SMP safe, doesn't use NMI, and auto-rate-limits. - */ -static void do_edac_check(void) -{ - debugf3("%s()\n", __func__); - check_mc_devices(); - do_pci_parity_check(); -} - -static int edac_kernel_thread(void *arg) -{ - set_freezable(); - while (!kthread_should_stop()) { - do_edac_check(); - - /* goto sleep for the interval */ - schedule_timeout_interruptible((HZ * poll_msec) / 1000); - try_to_freeze(); - } - - return 0; + mutex_unlock(&mem_ctls_mutex); } - -/* - * edac_mc_init - * module initialization entry point - */ -static int __init edac_mc_init(void) -{ - edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n"); - - /* - * Harvest and clear any boot/initialization PCI parity errors - * - * FIXME: This only clears errors logged by devices present at time of - * module initialization. We should also do an initial clear - * of each newly hotplugged device. - */ - clear_pci_parity_errors(); - - /* Create the MC sysfs entries */ - if (edac_sysfs_memctrl_setup()) { - edac_printk(KERN_ERR, EDAC_MC, - "Error initializing sysfs code\n"); - return -ENODEV; - } - - /* Create the PCI parity sysfs entries */ - if (edac_sysfs_pci_setup()) { - edac_sysfs_memctrl_teardown(); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC PCI: Error initializing sysfs code\n"); - return -ENODEV; - } - - /* create our kernel thread */ - edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac"); - - if (IS_ERR(edac_thread)) { - /* remove the sysfs entries */ - edac_sysfs_memctrl_teardown(); - edac_sysfs_pci_teardown(); - return PTR_ERR(edac_thread); - } - - return 0; -} - -/* - * edac_mc_exit() - * module exit/termination functioni - */ -static void __exit edac_mc_exit(void) -{ - debugf0("%s()\n", __func__); - kthread_stop(edac_thread); - - /* tear down the sysfs device */ - edac_sysfs_memctrl_teardown(); - edac_sysfs_pci_teardown(); -} - -module_init(edac_mc_init); -module_exit(edac_mc_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on work by Dan Hollis et al"); -MODULE_DESCRIPTION("Core library routines for MC reporting"); - -module_param(panic_on_ue, int, 0644); -MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); -#ifdef CONFIG_PCI -module_param(check_pci_parity, int, 0644); -MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); -module_param(panic_on_pci_parity, int, 0644); -MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); -#endif -module_param(log_ue, int, 0644); -MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); -module_param(log_ce, int, 0644); -MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on"); -module_param(poll_msec, int, 0644); -MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds"); -#ifdef CONFIG_EDAC_DEBUG -module_param(edac_debug_level, int, 0644); -MODULE_PARM_DESC(edac_debug_level, "Debug level"); -#endif diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c new file mode 100644 index 000000000000..cd090b0677a7 --- /dev/null +++ b/drivers/edac/edac_mc_sysfs.c @@ -0,0 +1,1024 @@ +/* + * edac_mc kernel module + * (C) 2005-2007 Linux Networx (http://lnxi.com) + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com + * + */ + +#include <linux/ctype.h> +#include <linux/bug.h> + +#include "edac_core.h" +#include "edac_module.h" + + +/* MC EDAC Controls, setable by module parameter, and sysfs */ +static int edac_mc_log_ue = 1; +static int edac_mc_log_ce = 1; +static int edac_mc_panic_on_ue; +static int edac_mc_poll_msec = 1000; + +/* Getter functions for above */ +int edac_mc_get_log_ue(void) +{ + return edac_mc_log_ue; +} + +int edac_mc_get_log_ce(void) +{ + return edac_mc_log_ce; +} + +int edac_mc_get_panic_on_ue(void) +{ + return edac_mc_panic_on_ue; +} + +/* this is temporary */ +int edac_mc_get_poll_msec(void) +{ + return edac_mc_poll_msec; +} + +/* Parameter declarations for above */ +module_param(edac_mc_panic_on_ue, int, 0644); +MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); +module_param(edac_mc_log_ue, int, 0644); +MODULE_PARM_DESC(edac_mc_log_ue, + "Log uncorrectable error to console: 0=off 1=on"); +module_param(edac_mc_log_ce, int, 0644); +MODULE_PARM_DESC(edac_mc_log_ce, + "Log correctable error to console: 0=off 1=on"); +module_param(edac_mc_poll_msec, int, 0644); +MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); + +/* + * various constants for Memory Controllers + */ +static const char *mem_types[] = { + [MEM_EMPTY] = "Empty", + [MEM_RESERVED] = "Reserved", + [MEM_UNKNOWN] = "Unknown", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "Unbuffered-SDR", + [MEM_RDR] = "Registered-SDR", + [MEM_DDR] = "Unbuffered-DDR", + [MEM_RDDR] = "Registered-DDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "Unbuffered-DDR2", + [MEM_FB_DDR2] = "FullyBuffered-DDR2", + [MEM_RDDR2] = "Registered-DDR2" +}; + +static const char *dev_types[] = { + [DEV_UNKNOWN] = "Unknown", + [DEV_X1] = "x1", + [DEV_X2] = "x2", + [DEV_X4] = "x4", + [DEV_X8] = "x8", + [DEV_X16] = "x16", + [DEV_X32] = "x32", + [DEV_X64] = "x64" +}; + +static const char *edac_caps[] = { + [EDAC_UNKNOWN] = "Unknown", + [EDAC_NONE] = "None", + [EDAC_RESERVED] = "Reserved", + [EDAC_PARITY] = "PARITY", + [EDAC_EC] = "EC", + [EDAC_SECDED] = "SECDED", + [EDAC_S2ECD2ED] = "S2ECD2ED", + [EDAC_S4ECD4ED] = "S4ECD4ED", + [EDAC_S8ECD8ED] = "S8ECD8ED", + [EDAC_S16ECD16ED] = "S16ECD16ED" +}; + + + +/* + * /sys/devices/system/edac/mc; + * data structures and methods + */ +static ssize_t memctrl_int_show(void *ptr, char *buffer) +{ + int *value = (int *)ptr; + return sprintf(buffer, "%u\n", *value); +} + +static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) +{ + int *value = (int *)ptr; + + if (isdigit(*buffer)) + *value = simple_strtoul(buffer, NULL, 0); + + return count; +} + + +/* EDAC sysfs CSROW data structures and methods + */ + +/* Set of more default csrow<id> attribute show/store functions */ +static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%u\n", csrow->ue_count); +} + +static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%u\n", csrow->ce_count); +} + +static ssize_t csrow_size_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages)); +} + +static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%s\n", mem_types[csrow->mtype]); +} + +static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%s\n", dev_types[csrow->dtype]); +} + +static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, + int private) +{ + return sprintf(data, "%s\n", edac_caps[csrow->edac_mode]); +} + +/* show/store functions for DIMM Label attributes */ +static ssize_t channel_dimm_label_show(struct csrow_info *csrow, + char *data, int channel) +{ + return snprintf(data, EDAC_MC_LABEL_LEN, "%s", + csrow->channels[channel].label); +} + +static ssize_t channel_dimm_label_store(struct csrow_info *csrow, + const char *data, + size_t count, int channel) +{ + ssize_t max_size = 0; + + max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1); + strncpy(csrow->channels[channel].label, data, max_size); + csrow->channels[channel].label[max_size] = '\0'; + + return max_size; +} + +/* show function for dynamic chX_ce_count attribute */ +static ssize_t channel_ce_count_show(struct csrow_info *csrow, + char *data, int channel) +{ + return sprintf(data, "%u\n", csrow->channels[channel].ce_count); +} + +/* csrow specific attribute structure */ +struct csrowdev_attribute { + struct attribute attr; + ssize_t(*show) (struct csrow_info *, char *, int); + ssize_t(*store) (struct csrow_info *, const char *, size_t, int); + int private; +}; + +#define to_csrow(k) container_of(k, struct csrow_info, kobj) +#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr) + +/* Set of show/store higher level functions for default csrow attributes */ +static ssize_t csrowdev_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct csrow_info *csrow = to_csrow(kobj); + struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); + + if (csrowdev_attr->show) + return csrowdev_attr->show(csrow, + buffer, csrowdev_attr->private); + return -EIO; +} + +static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct csrow_info *csrow = to_csrow(kobj); + struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); + + if (csrowdev_attr->store) + return csrowdev_attr->store(csrow, + buffer, + count, csrowdev_attr->private); + return -EIO; +} + +static struct sysfs_ops csrowfs_ops = { + .show = csrowdev_show, + .store = csrowdev_store +}; + +#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \ +static struct csrowdev_attribute attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .private = _private, \ +}; + +/* default cwrow<id>/attribute files */ +CSROWDEV_ATTR(size_mb, S_IRUGO, csrow_size_show, NULL, 0); +CSROWDEV_ATTR(dev_type, S_IRUGO, csrow_dev_type_show, NULL, 0); +CSROWDEV_ATTR(mem_type, S_IRUGO, csrow_mem_type_show, NULL, 0); +CSROWDEV_ATTR(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL, 0); +CSROWDEV_ATTR(ue_count, S_IRUGO, csrow_ue_count_show, NULL, 0); +CSROWDEV_ATTR(ce_count, S_IRUGO, csrow_ce_count_show, NULL, 0); + +/* default attributes of the CSROW<id> object */ +static struct csrowdev_attribute *default_csrow_attr[] = { + &attr_dev_type, + &attr_mem_type, + &attr_edac_mode, + &attr_size_mb, + &attr_ue_count, + &attr_ce_count, + NULL, +}; + +/* possible dynamic channel DIMM Label attribute files */ +CSROWDEV_ATTR(ch0_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 0); +CSROWDEV_ATTR(ch1_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 1); +CSROWDEV_ATTR(ch2_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 2); +CSROWDEV_ATTR(ch3_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 3); +CSROWDEV_ATTR(ch4_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 4); +CSROWDEV_ATTR(ch5_dimm_label, S_IRUGO | S_IWUSR, + channel_dimm_label_show, channel_dimm_label_store, 5); + +/* Total possible dynamic DIMM Label attribute file table */ +static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = { + &attr_ch0_dimm_label, + &attr_ch1_dimm_label, + &attr_ch2_dimm_label, + &attr_ch3_dimm_label, + &attr_ch4_dimm_label, + &attr_ch5_dimm_label +}; + +/* possible dynamic channel ce_count attribute files */ +CSROWDEV_ATTR(ch0_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 0); +CSROWDEV_ATTR(ch1_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 1); +CSROWDEV_ATTR(ch2_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 2); +CSROWDEV_ATTR(ch3_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 3); +CSROWDEV_ATTR(ch4_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 4); +CSROWDEV_ATTR(ch5_ce_count, S_IRUGO | S_IWUSR, channel_ce_count_show, NULL, 5); + +/* Total possible dynamic ce_count attribute file table */ +static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = { + &attr_ch0_ce_count, + &attr_ch1_ce_count, + &attr_ch2_ce_count, + &attr_ch3_ce_count, + &attr_ch4_ce_count, + &attr_ch5_ce_count +}; + +#define EDAC_NR_CHANNELS 6 + +/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */ +static int edac_create_channel_files(struct kobject *kobj, int chan) +{ + int err = -ENODEV; + + if (chan >= EDAC_NR_CHANNELS) + return err; + + /* create the DIMM label attribute file */ + err = sysfs_create_file(kobj, + (struct attribute *) + dynamic_csrow_dimm_attr[chan]); + + if (!err) { + /* create the CE Count attribute file */ + err = sysfs_create_file(kobj, + (struct attribute *) + dynamic_csrow_ce_count_attr[chan]); + } else { + debugf1("%s() dimm labels and ce_count files created", + __func__); + } + + return err; +} + +/* No memory to release for this kobj */ +static void edac_csrow_instance_release(struct kobject *kobj) +{ + struct mem_ctl_info *mci; + struct csrow_info *cs; + + debugf1("%s()\n", __func__); + + cs = container_of(kobj, struct csrow_info, kobj); + mci = cs->mci; + + kobject_put(&mci->edac_mci_kobj); +} + +/* the kobj_type instance for a CSROW */ +static struct kobj_type ktype_csrow = { + .release = edac_csrow_instance_release, + .sysfs_ops = &csrowfs_ops, + .default_attrs = (struct attribute **)default_csrow_attr, +}; + +/* Create a CSROW object under specifed edac_mc_device */ +static int edac_create_csrow_object(struct mem_ctl_info *mci, + struct csrow_info *csrow, int index) +{ + struct kobject *kobj_mci = &mci->edac_mci_kobj; + struct kobject *kobj; + int chan; + int err; + + /* generate ..../edac/mc/mc<id>/csrow<index> */ + memset(&csrow->kobj, 0, sizeof(csrow->kobj)); + csrow->mci = mci; /* include container up link */ + csrow->kobj.parent = kobj_mci; + csrow->kobj.ktype = &ktype_csrow; + + /* name this instance of csrow<id> */ + err = kobject_set_name(&csrow->kobj, "csrow%d", index); + if (err) + goto err_out; + + /* bump the mci instance's kobject's ref count */ + kobj = kobject_get(&mci->edac_mci_kobj); + if (!kobj) { + err = -ENODEV; + goto err_out; + } + + /* Instanstiate the csrow object */ + err = kobject_register(&csrow->kobj); + if (err) + goto err_release_top_kobj; + + /* At this point, to release a csrow kobj, one must + * call the kobject_unregister and allow that tear down + * to work the releasing + */ + + /* Create the dyanmic attribute files on this csrow, + * namely, the DIMM labels and the channel ce_count + */ + for (chan = 0; chan < csrow->nr_channels; chan++) { + err = edac_create_channel_files(&csrow->kobj, chan); + if (err) { + /* special case the unregister here */ + kobject_unregister(&csrow->kobj); + goto err_out; + } + } + + return 0; + + /* error unwind stack */ +err_release_top_kobj: + kobject_put(&mci->edac_mci_kobj); + +err_out: + return err; +} + +/* default sysfs methods and data structures for the main MCI kobject */ + +static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, + const char *data, size_t count) +{ + int row, chan; + + mci->ue_noinfo_count = 0; + mci->ce_noinfo_count = 0; + mci->ue_count = 0; + mci->ce_count = 0; + + for (row = 0; row < mci->nr_csrows; row++) { + struct csrow_info *ri = &mci->csrows[row]; + + ri->ue_count = 0; + ri->ce_count = 0; + + for (chan = 0; chan < ri->nr_channels; chan++) + ri->channels[chan].ce_count = 0; + } + + mci->start_time = jiffies; + return count; +} + +/* memory scrubbing */ +static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, + const char *data, size_t count) +{ + u32 bandwidth = -1; + + if (mci->set_sdram_scrub_rate) { + + memctrl_int_store(&bandwidth, data, count); + + if (!(*mci->set_sdram_scrub_rate) (mci, &bandwidth)) { + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate set successfully, applied: %d\n", + bandwidth); + } else { + /* FIXME: error codes maybe? */ + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate set FAILED, could not apply: %d\n", + bandwidth); + } + } else { + /* FIXME: produce "not implemented" ERROR for user-side. */ + edac_printk(KERN_WARNING, EDAC_MC, + "Memory scrubbing 'set'control is not implemented!\n"); + } + return count; +} + +static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) +{ + u32 bandwidth = -1; + + if (mci->get_sdram_scrub_rate) { + if (!(*mci->get_sdram_scrub_rate) (mci, &bandwidth)) { + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate successfully, fetched: %d\n", + bandwidth); + } else { + /* FIXME: error codes maybe? */ + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate fetch FAILED, got: %d\n", + bandwidth); + } + } else { + /* FIXME: produce "not implemented" ERROR for user-side. */ + edac_printk(KERN_WARNING, EDAC_MC, + "Memory scrubbing 'get' control is not implemented\n"); + } + return sprintf(data, "%d\n", bandwidth); +} + +/* default attribute files for the MCI object */ +static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ue_count); +} + +static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ce_count); +} + +static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ce_noinfo_count); +} + +static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%d\n", mci->ue_noinfo_count); +} + +static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%ld\n", (jiffies - mci->start_time) / HZ); +} + +static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data) +{ + return sprintf(data, "%s\n", mci->ctl_name); +} + +static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data) +{ + int total_pages, csrow_idx; + + for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows; + csrow_idx++) { + struct csrow_info *csrow = &mci->csrows[csrow_idx]; + + if (!csrow->nr_pages) + continue; + + total_pages += csrow->nr_pages; + } + + return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages)); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj) +#define to_mcidev_attr(a) container_of(a,struct mcidev_sysfs_attribute,attr) + +/* MCI show/store functions for top most object */ +static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + struct mem_ctl_info *mem_ctl_info = to_mci(kobj); + struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + + if (mcidev_attr->show) + return mcidev_attr->show(mem_ctl_info, buffer); + + return -EIO; +} + +static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct mem_ctl_info *mem_ctl_info = to_mci(kobj); + struct mcidev_sysfs_attribute *mcidev_attr = to_mcidev_attr(attr); + + if (mcidev_attr->store) + return mcidev_attr->store(mem_ctl_info, buffer, count); + + return -EIO; +} + +/* Intermediate show/store table */ +static struct sysfs_ops mci_ops = { + .show = mcidev_show, + .store = mcidev_store +}; + +#define MCIDEV_ATTR(_name,_mode,_show,_store) \ +static struct mcidev_sysfs_attribute mci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +/* default Control file */ +MCIDEV_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); + +/* default Attribute files */ +MCIDEV_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); +MCIDEV_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); +MCIDEV_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); +MCIDEV_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); +MCIDEV_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); +MCIDEV_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); +MCIDEV_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); + +/* memory scrubber attribute file */ +MCIDEV_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, + mci_sdram_scrub_rate_store); + +static struct mcidev_sysfs_attribute *mci_attr[] = { + &mci_attr_reset_counters, + &mci_attr_mc_name, + &mci_attr_size_mb, + &mci_attr_seconds_since_reset, + &mci_attr_ue_noinfo_count, + &mci_attr_ce_noinfo_count, + &mci_attr_ue_count, + &mci_attr_ce_count, + &mci_attr_sdram_scrub_rate, + NULL +}; + + +/* + * Release of a MC controlling instance + * + * each MC control instance has the following resources upon entry: + * a) a ref count on the top memctl kobj + * b) a ref count on this module + * + * this function must decrement those ref counts and then + * issue a free on the instance's memory + */ +static void edac_mci_control_release(struct kobject *kobj) +{ + struct mem_ctl_info *mci; + + mci = to_mci(kobj); + + debugf0("%s() mci instance idx=%d releasing\n", __func__, mci->mc_idx); + + /* decrement the module ref count */ + module_put(mci->owner); + + /* free the mci instance memory here */ + kfree(mci); +} + +static struct kobj_type ktype_mci = { + .release = edac_mci_control_release, + .sysfs_ops = &mci_ops, + .default_attrs = (struct attribute **)mci_attr, +}; + +/* show/store, tables, etc for the MC kset */ + + +struct memctrl_dev_attribute { + struct attribute attr; + void *value; + ssize_t(*show) (void *, char *); + ssize_t(*store) (void *, const char *, size_t); +}; + +/* Set of show/store abstract level functions for memory control object */ +static ssize_t memctrl_dev_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct memctrl_dev_attribute *memctrl_dev; + memctrl_dev = (struct memctrl_dev_attribute *)attr; + + if (memctrl_dev->show) + return memctrl_dev->show(memctrl_dev->value, buffer); + + return -EIO; +} + +static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct memctrl_dev_attribute *memctrl_dev; + memctrl_dev = (struct memctrl_dev_attribute *)attr; + + if (memctrl_dev->store) + return memctrl_dev->store(memctrl_dev->value, buffer, count); + + return -EIO; +} + +static struct sysfs_ops memctrlfs_ops = { + .show = memctrl_dev_show, + .store = memctrl_dev_store +}; + +#define MEMCTRL_ATTR(_name, _mode, _show, _store) \ +static struct memctrl_dev_attribute attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = &_name, \ + .show = _show, \ + .store = _store, \ +}; + +#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \ +static struct memctrl_dev_attribute attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = _data, \ + .show = _show, \ + .store = _store, \ +}; + +/* csrow<id> control files */ +MEMCTRL_ATTR(edac_mc_panic_on_ue, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +MEMCTRL_ATTR(edac_mc_log_ue, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +MEMCTRL_ATTR(edac_mc_log_ce, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +MEMCTRL_ATTR(edac_mc_poll_msec, + S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); + +/* Base Attributes of the memory ECC object */ +static struct memctrl_dev_attribute *memctrl_attr[] = { + &attr_edac_mc_panic_on_ue, + &attr_edac_mc_log_ue, + &attr_edac_mc_log_ce, + &attr_edac_mc_poll_msec, + NULL, +}; + + +/* the ktype for the mc_kset internal kobj */ +static struct kobj_type ktype_mc_set_attribs = { + .sysfs_ops = &memctrlfs_ops, + .default_attrs = (struct attribute **)memctrl_attr, +}; + +/* EDAC memory controller sysfs kset: + * /sys/devices/system/edac/mc + */ +static struct kset mc_kset = { + .kobj = {.name = "mc", .ktype = &ktype_mc_set_attribs }, + .ktype = &ktype_mci, +}; + + +/* + * edac_mc_register_sysfs_main_kobj + * + * setups and registers the main kobject for each mci + */ +int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci) +{ + struct kobject *kobj_mci; + int err; + + debugf1("%s()\n", __func__); + + kobj_mci = &mci->edac_mci_kobj; + + /* Init the mci's kobject */ + memset(kobj_mci, 0, sizeof(*kobj_mci)); + + /* this instance become part of the mc_kset */ + kobj_mci->kset = &mc_kset; + + /* set the name of the mc<id> object */ + err = kobject_set_name(kobj_mci, "mc%d", mci->mc_idx); + if (err) + goto fail_out; + + /* Record which module 'owns' this control structure + * and bump the ref count of the module + */ + mci->owner = THIS_MODULE; + + /* bump ref count on this module */ + if (!try_module_get(mci->owner)) { + err = -ENODEV; + goto fail_out; + } + + /* register the mc<id> kobject to the mc_kset */ + err = kobject_register(kobj_mci); + if (err) { + debugf1("%s()Failed to register '.../edac/mc%d'\n", + __func__, mci->mc_idx); + goto kobj_reg_fail; + } + + /* At this point, to 'free' the control struct, + * edac_mc_unregister_sysfs_main_kobj() must be used + */ + + debugf1("%s() Registered '.../edac/mc%d' kobject\n", + __func__, mci->mc_idx); + + return 0; + + /* Error exit stack */ + +kobj_reg_fail: + module_put(mci->owner); + +fail_out: + return err; +} + +/* + * edac_mc_register_sysfs_main_kobj + * + * tears down and the main mci kobject from the mc_kset + */ +void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci) +{ + /* delete the kobj from the mc_kset */ + kobject_unregister(&mci->edac_mci_kobj); +} + +#define EDAC_DEVICE_SYMLINK "device" + +/* + * edac_create_mci_instance_attributes + * create MC driver specific attributes at the topmost level + * directory of this mci instance. + */ +static int edac_create_mci_instance_attributes(struct mem_ctl_info *mci) +{ + int err; + struct mcidev_sysfs_attribute *sysfs_attrib; + + /* point to the start of the array and iterate over it + * adding each attribute listed to this mci instance's kobject + */ + sysfs_attrib = mci->mc_driver_sysfs_attributes; + + while (sysfs_attrib && sysfs_attrib->attr.name) { + err = sysfs_create_file(&mci->edac_mci_kobj, + (struct attribute*) sysfs_attrib); + if (err) { + return err; + } + + sysfs_attrib++; + } + + return 0; +} + +/* + * edac_remove_mci_instance_attributes + * remove MC driver specific attributes at the topmost level + * directory of this mci instance. + */ +static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci) +{ + struct mcidev_sysfs_attribute *sysfs_attrib; + + /* point to the start of the array and iterate over it + * adding each attribute listed to this mci instance's kobject + */ + sysfs_attrib = mci->mc_driver_sysfs_attributes; + + /* loop if there are attributes and until we hit a NULL entry */ + while (sysfs_attrib && sysfs_attrib->attr.name) { + sysfs_remove_file(&mci->edac_mci_kobj, + (struct attribute *) sysfs_attrib); + sysfs_attrib++; + } +} + + +/* + * Create a new Memory Controller kobject instance, + * mc<id> under the 'mc' directory + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) +{ + int i; + int err; + struct csrow_info *csrow; + struct kobject *kobj_mci = &mci->edac_mci_kobj; + + debugf0("%s() idx=%d\n", __func__, mci->mc_idx); + + /* create a symlink for the device */ + err = sysfs_create_link(kobj_mci, &mci->dev->kobj, + EDAC_DEVICE_SYMLINK); + if (err) { + debugf1("%s() failure to create symlink\n", __func__); + goto fail0; + } + + /* If the low level driver desires some attributes, + * then create them now for the driver. + */ + if (mci->mc_driver_sysfs_attributes) { + err = edac_create_mci_instance_attributes(mci); + if (err) { + debugf1("%s() failure to create mci attributes\n", + __func__); + goto fail0; + } + } + + /* Make directories for each CSROW object under the mc<id> kobject + */ + for (i = 0; i < mci->nr_csrows; i++) { + csrow = &mci->csrows[i]; + + /* Only expose populated CSROWs */ + if (csrow->nr_pages > 0) { + err = edac_create_csrow_object(mci, csrow, i); + if (err) { + debugf1("%s() failure: create csrow %d obj\n", + __func__, i); + goto fail1; + } + } + } + + return 0; + + /* CSROW error: backout what has already been registered, */ +fail1: + for (i--; i >= 0; i--) { + if (csrow->nr_pages > 0) { + kobject_unregister(&mci->csrows[i].kobj); + } + } + + /* remove the mci instance's attributes, if any */ + edac_remove_mci_instance_attributes(mci); + + /* remove the symlink */ + sysfs_remove_link(kobj_mci, EDAC_DEVICE_SYMLINK); + +fail0: + return err; +} + +/* + * remove a Memory Controller instance + */ +void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) +{ + int i; + + debugf0("%s()\n", __func__); + + /* remove all csrow kobjects */ + for (i = 0; i < mci->nr_csrows; i++) { + if (mci->csrows[i].nr_pages > 0) { + debugf0("%s() unreg csrow-%d\n", __func__, i); + kobject_unregister(&mci->csrows[i].kobj); + } + } + + debugf0("%s() remove_link\n", __func__); + + /* remove the symlink */ + sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK); + + debugf0("%s() remove_mci_instance\n", __func__); + + /* remove this mci instance's attribtes */ + edac_remove_mci_instance_attributes(mci); + + debugf0("%s() unregister this mci kobj\n", __func__); + + /* unregister this instance's kobject */ + kobject_unregister(&mci->edac_mci_kobj); +} + + + + +/* + * edac_setup_sysfs_mc_kset(void) + * + * Initialize the mc_kset for the 'mc' entry + * This requires creating the top 'mc' directory with a kset + * and its controls/attributes. + * + * To this 'mc' kset, instance 'mci' will be grouped as children. + * + * Return: 0 SUCCESS + * !0 FAILURE error code + */ +int edac_sysfs_setup_mc_kset(void) +{ + int err = 0; + struct sysdev_class *edac_class; + + debugf1("%s()\n", __func__); + + /* get the /sys/devices/system/edac class reference */ + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class error=%d\n", __func__, err); + goto fail_out; + } + + /* Init the MC's kobject */ + mc_kset.kobj.parent = &edac_class->kset.kobj; + + /* register the mc_kset */ + err = kset_register(&mc_kset); + if (err) { + debugf1("%s() Failed to register '.../edac/mc'\n", __func__); + goto fail_out; + } + + debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); + + return 0; + + + /* error unwind stack */ +fail_out: + return err; +} + +/* + * edac_sysfs_teardown_mc_kset + * + * deconstruct the mc_ket for memory controllers + */ +void edac_sysfs_teardown_mc_kset(void) +{ + kset_unregister(&mc_kset); +} + diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c new file mode 100644 index 000000000000..e0c4a4086055 --- /dev/null +++ b/drivers/edac/edac_module.c @@ -0,0 +1,222 @@ +/* + * edac_module.c + * + * (C) 2007 www.softwarebitmaker.com + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * Author: Doug Thompson <dougthompson@xmission.com> + * + */ +#include <linux/edac.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_VERSION "Ver: 2.1.0 " __DATE__ + +#ifdef CONFIG_EDAC_DEBUG +/* Values of 0 to 4 will generate output */ +int edac_debug_level = 2; +EXPORT_SYMBOL_GPL(edac_debug_level); +#endif + +/* scope is to module level only */ +struct workqueue_struct *edac_workqueue; + +/* + * sysfs object: /sys/devices/system/edac + * need to export to other files in this modules + */ +static struct sysdev_class edac_class = { + set_kset_name("edac"), +}; +static int edac_class_valid; + +/* + * edac_op_state_to_string() + */ +char *edac_op_state_to_string(int opstate) +{ + if (opstate == OP_RUNNING_POLL) + return "POLLED"; + else if (opstate == OP_RUNNING_INTERRUPT) + return "INTERRUPT"; + else if (opstate == OP_RUNNING_POLL_INTR) + return "POLL-INTR"; + else if (opstate == OP_ALLOC) + return "ALLOC"; + else if (opstate == OP_OFFLINE) + return "OFFLINE"; + + return "UNKNOWN"; +} + +/* + * edac_get_edac_class() + * + * return pointer to the edac class of 'edac' + */ +struct sysdev_class *edac_get_edac_class(void) +{ + struct sysdev_class *classptr = NULL; + + if (edac_class_valid) + classptr = &edac_class; + + return classptr; +} + +/* + * edac_register_sysfs_edac_name() + * + * register the 'edac' into /sys/devices/system + * + * return: + * 0 success + * !0 error + */ +static int edac_register_sysfs_edac_name(void) +{ + int err; + + /* create the /sys/devices/system/edac directory */ + err = sysdev_class_register(&edac_class); + + if (err) { + debugf1("%s() error=%d\n", __func__, err); + return err; + } + + edac_class_valid = 1; + return 0; +} + +/* + * sysdev_class_unregister() + * + * unregister the 'edac' from /sys/devices/system + */ +static void edac_unregister_sysfs_edac_name(void) +{ + /* only if currently registered, then unregister it */ + if (edac_class_valid) + sysdev_class_unregister(&edac_class); + + edac_class_valid = 0; +} + +/* + * edac_workqueue_setup + * initialize the edac work queue for polling operations + */ +static int edac_workqueue_setup(void) +{ + edac_workqueue = create_singlethread_workqueue("edac-poller"); + if (edac_workqueue == NULL) + return -ENODEV; + else + return 0; +} + +/* + * edac_workqueue_teardown + * teardown the edac workqueue + */ +static void edac_workqueue_teardown(void) +{ + if (edac_workqueue) { + flush_workqueue(edac_workqueue); + destroy_workqueue(edac_workqueue); + edac_workqueue = NULL; + } +} + +/* + * edac_init + * module initialization entry point + */ +static int __init edac_init(void) +{ + int err = 0; + + edac_printk(KERN_INFO, EDAC_MC, EDAC_VERSION "\n"); + + /* + * Harvest and clear any boot/initialization PCI parity errors + * + * FIXME: This only clears errors logged by devices present at time of + * module initialization. We should also do an initial clear + * of each newly hotplugged device. + */ + edac_pci_clear_parity_errors(); + + /* + * perform the registration of the /sys/devices/system/edac class object + */ + if (edac_register_sysfs_edac_name()) { + edac_printk(KERN_ERR, EDAC_MC, + "Error initializing 'edac' kobject\n"); + err = -ENODEV; + goto error; + } + + /* + * now set up the mc_kset under the edac class object + */ + err = edac_sysfs_setup_mc_kset(); + if (err) + goto sysfs_setup_fail; + + /* Setup/Initialize the workq for this core */ + err = edac_workqueue_setup(); + if (err) { + edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n"); + goto workq_fail; + } + + return 0; + + /* Error teardown stack */ +workq_fail: + edac_sysfs_teardown_mc_kset(); + +sysfs_setup_fail: + edac_unregister_sysfs_edac_name(); + +error: + return err; +} + +/* + * edac_exit() + * module exit/termination function + */ +static void __exit edac_exit(void) +{ + debugf0("%s()\n", __func__); + + /* tear down the various subsystems */ + edac_workqueue_teardown(); + edac_sysfs_teardown_mc_kset(); + edac_unregister_sysfs_edac_name(); +} + +/* + * Inform the kernel of our entry and exit points + */ +module_init(edac_init); +module_exit(edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al"); +MODULE_DESCRIPTION("Core library routines for EDAC reporting"); + +/* refer to *_sysfs.c files for parameters that are exported via sysfs */ + +#ifdef CONFIG_EDAC_DEBUG +module_param(edac_debug_level, int, 0644); +MODULE_PARM_DESC(edac_debug_level, "Debug level"); +#endif diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h new file mode 100644 index 000000000000..a2134dfc3cc6 --- /dev/null +++ b/drivers/edac/edac_module.h @@ -0,0 +1,77 @@ + +/* + * edac_module.h + * + * For defining functions/data for within the EDAC_CORE module only + * + * written by doug thompson <norsk5@xmission.h> + */ + +#ifndef __EDAC_MODULE_H__ +#define __EDAC_MODULE_H__ + +#include <linux/sysdev.h> + +#include "edac_core.h" + +/* + * INTERNAL EDAC MODULE: + * EDAC memory controller sysfs create/remove functions + * and setup/teardown functions + * + * edac_mc objects + */ +extern int edac_sysfs_setup_mc_kset(void); +extern void edac_sysfs_teardown_mc_kset(void); +extern int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci); +extern void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci); +extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci); +extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); +extern void edac_check_mc_devices(void); +extern int edac_get_log_ue(void); +extern int edac_get_log_ce(void); +extern int edac_get_panic_on_ue(void); +extern int edac_mc_get_log_ue(void); +extern int edac_mc_get_log_ce(void); +extern int edac_mc_get_panic_on_ue(void); +extern int edac_get_poll_msec(void); +extern int edac_mc_get_poll_msec(void); + +extern int edac_device_register_sysfs_main_kobj( + struct edac_device_ctl_info *edac_dev); +extern void edac_device_unregister_sysfs_main_kobj( + struct edac_device_ctl_info *edac_dev); +extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); +extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); +extern struct sysdev_class *edac_get_edac_class(void); + +/* edac core workqueue: single CPU mode */ +extern struct workqueue_struct *edac_workqueue; +extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + unsigned msec); +extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); +extern void edac_device_reset_delay_period(struct edac_device_ctl_info + *edac_dev, unsigned long value); +extern void *edac_align_ptr(void *ptr, unsigned size); + +/* + * EDAC PCI functions + */ +#ifdef CONFIG_PCI +extern void edac_pci_do_parity_check(void); +extern void edac_pci_clear_parity_errors(void); +extern int edac_sysfs_pci_setup(void); +extern void edac_sysfs_pci_teardown(void); +extern int edac_pci_get_check_errors(void); +extern int edac_pci_get_poll_msec(void); +#else /* CONFIG_PCI */ +/* pre-process these away */ +#define edac_pci_do_parity_check() +#define edac_pci_clear_parity_errors() +#define edac_sysfs_pci_setup() (0) +#define edac_sysfs_pci_teardown() +#define edac_pci_get_check_errors() +#define edac_pci_get_poll_msec() +#endif /* CONFIG_PCI */ + +#endif /* __EDAC_MODULE_H__ */ diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c new file mode 100644 index 000000000000..d9cd5e048cee --- /dev/null +++ b/drivers/edac/edac_pci.c @@ -0,0 +1,433 @@ +/* + * EDAC PCI component + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#include <linux/module.h> +#include <linux/types.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/sysctl.h> +#include <linux/highmem.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> +#include <linux/workqueue.h> +#include <asm/uaccess.h> +#include <asm/page.h> + +#include "edac_core.h" +#include "edac_module.h" + +static DEFINE_MUTEX(edac_pci_ctls_mutex); +static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list); + +static inline void edac_lock_pci_list(void) +{ + mutex_lock(&edac_pci_ctls_mutex); +} + +static inline void edac_unlock_pci_list(void) +{ + mutex_unlock(&edac_pci_ctls_mutex); +} + +/* + * The alloc() and free() functions for the 'edac_pci' control info + * structure. The chip driver will allocate one of these for each + * edac_pci it is going to control/register with the EDAC CORE. + */ +struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, + const char *edac_pci_name) +{ + struct edac_pci_ctl_info *pci; + void *pvt; + unsigned int size; + + pci = (struct edac_pci_ctl_info *)0; + pvt = edac_align_ptr(&pci[1], sz_pvt); + size = ((unsigned long)pvt) + sz_pvt; + + if ((pci = kzalloc(size, GFP_KERNEL)) == NULL) + return NULL; + + pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL; + + pci->pvt_info = pvt; + + pci->op_state = OP_ALLOC; + + snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name); + + return pci; +} + +EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); + +/* + * edac_pci_free_ctl_info() + * frees the memory allocated by edac_pci_alloc_ctl_info() function + */ +void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci) +{ + kfree(pci); +} + +EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info); + +/* + * find_edac_pci_by_dev() + * scans the edac_pci list for a specific 'struct device *' + */ +static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev) +{ + struct edac_pci_ctl_info *pci; + struct list_head *item; + + debugf3("%s()\n", __func__); + + list_for_each(item, &edac_pci_list) { + pci = list_entry(item, struct edac_pci_ctl_info, link); + + if (pci->dev == dev) + return pci; + } + + return NULL; +} + +/* + * add_edac_pci_to_global_list + * Before calling this function, caller must assign a unique value to + * edac_dev->pci_idx. + * Return: + * 0 on success + * 1 on failure + */ +static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci) +{ + struct list_head *item, *insert_before; + struct edac_pci_ctl_info *rover; + + insert_before = &edac_pci_list; + + /* Determine if already on the list */ + if (unlikely((rover = find_edac_pci_by_dev(pci->dev)) != NULL)) + goto fail0; + + /* Insert in ascending order by 'pci_idx', so find position */ + list_for_each(item, &edac_pci_list) { + rover = list_entry(item, struct edac_pci_ctl_info, link); + + if (rover->pci_idx >= pci->pci_idx) { + if (unlikely(rover->pci_idx == pci->pci_idx)) + goto fail1; + + insert_before = item; + break; + } + } + + list_add_tail_rcu(&pci->link, insert_before); + return 0; + +fail0: + edac_printk(KERN_WARNING, EDAC_PCI, + "%s (%s) %s %s already assigned %d\n", + rover->dev->bus_id, dev_name(rover), + rover->mod_name, rover->ctl_name, rover->pci_idx); + return 1; + +fail1: + edac_printk(KERN_WARNING, EDAC_PCI, + "but in low-level driver: attempt to assign\n" + "\tduplicate pci_idx %d in %s()\n", rover->pci_idx, + __func__); + return 1; +} + +/* + * complete_edac_pci_list_del + */ +static void complete_edac_pci_list_del(struct rcu_head *head) +{ + struct edac_pci_ctl_info *pci; + + pci = container_of(head, struct edac_pci_ctl_info, rcu); + INIT_LIST_HEAD(&pci->link); + complete(&pci->complete); +} + +/* + * del_edac_pci_from_global_list + */ +static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) +{ + list_del_rcu(&pci->link); + init_completion(&pci->complete); + call_rcu(&pci->rcu, complete_edac_pci_list_del); + wait_for_completion(&pci->complete); +} + +/* + * edac_pci_find() + * Search for an edac_pci_ctl_info structure whose index is 'idx' + * + * If found, return a pointer to the structure + * Else return NULL. + * + * Caller must hold pci_ctls_mutex. + */ +struct edac_pci_ctl_info *edac_pci_find(int idx) +{ + struct list_head *item; + struct edac_pci_ctl_info *pci; + + /* Iterage over list, looking for exact match of ID */ + list_for_each(item, &edac_pci_list) { + pci = list_entry(item, struct edac_pci_ctl_info, link); + + if (pci->pci_idx >= idx) { + if (pci->pci_idx == idx) + return pci; + + /* not on list, so terminate early */ + break; + } + } + + return NULL; +} + +EXPORT_SYMBOL_GPL(edac_pci_find); + +/* + * edac_pci_workq_function() + * performs the operation scheduled by a workq request + */ +static void edac_pci_workq_function(struct work_struct *work_req) +{ + struct delayed_work *d_work = (struct delayed_work *)work_req; + struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work); + + edac_lock_pci_list(); + + if ((pci->op_state == OP_RUNNING_POLL) && + (pci->edac_check != NULL) && (edac_pci_get_check_errors())) + pci->edac_check(pci); + + edac_unlock_pci_list(); + + /* Reschedule */ + queue_delayed_work(edac_workqueue, &pci->work, + msecs_to_jiffies(edac_pci_get_poll_msec())); +} + +/* + * edac_pci_workq_setup() + * initialize a workq item for this edac_pci instance + * passing in the new delay period in msec + */ +static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, + unsigned int msec) +{ + debugf0("%s()\n", __func__); + + INIT_DELAYED_WORK(&pci->work, edac_pci_workq_function); + queue_delayed_work(edac_workqueue, &pci->work, + msecs_to_jiffies(edac_pci_get_poll_msec())); +} + +/* + * edac_pci_workq_teardown() + * stop the workq processing on this edac_pci instance + */ +static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) +{ + int status; + + status = cancel_delayed_work(&pci->work); + if (status == 0) + flush_workqueue(edac_workqueue); +} + +/* + * edac_pci_reset_delay_period + */ +void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, + unsigned long value) +{ + edac_lock_pci_list(); + + edac_pci_workq_teardown(pci); + + edac_pci_workq_setup(pci, value); + + edac_unlock_pci_list(); +} + +EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period); + +/* + * edac_pci_add_device: Insert the 'edac_dev' structure into the + * edac_pci global list and create sysfs entries associated with + * edac_pci structure. + * @pci: pointer to the edac_device structure to be added to the list + * @edac_idx: A unique numeric identifier to be assigned to the + * 'edac_pci' structure. + * + * Return: + * 0 Success + * !0 Failure + */ +int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx) +{ + debugf0("%s()\n", __func__); + + pci->pci_idx = edac_idx; + + edac_lock_pci_list(); + + if (add_edac_pci_to_global_list(pci)) + goto fail0; + + pci->start_time = jiffies; + + if (edac_pci_create_sysfs(pci)) { + edac_pci_printk(pci, KERN_WARNING, + "failed to create sysfs pci\n"); + goto fail1; + } + + if (pci->edac_check != NULL) { + pci->op_state = OP_RUNNING_POLL; + + edac_pci_workq_setup(pci, 1000); + } else { + pci->op_state = OP_RUNNING_INTERRUPT; + } + + edac_pci_printk(pci, KERN_INFO, + "Giving out device to module '%s' controller '%s':" + " DEV '%s' (%s)\n", + pci->mod_name, + pci->ctl_name, + dev_name(pci), edac_op_state_to_string(pci->op_state)); + + edac_unlock_pci_list(); + return 0; + +fail1: + del_edac_pci_from_global_list(pci); +fail0: + edac_unlock_pci_list(); + return 1; +} + +EXPORT_SYMBOL_GPL(edac_pci_add_device); + +/* + * edac_pci_del_device() + * Remove sysfs entries for specified edac_pci structure and + * then remove edac_pci structure from global list + * + * @dev: + * Pointer to 'struct device' representing edac_pci structure + * to remove + * + * Return: + * Pointer to removed edac_pci structure, + * or NULL if device not found + */ +struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev) +{ + struct edac_pci_ctl_info *pci; + + debugf0("%s()\n", __func__); + + edac_lock_pci_list(); + + if ((pci = find_edac_pci_by_dev(dev)) == NULL) { + edac_unlock_pci_list(); + return NULL; + } + + pci->op_state = OP_OFFLINE; + + edac_pci_workq_teardown(pci); + + edac_pci_remove_sysfs(pci); + + del_edac_pci_from_global_list(pci); + + edac_unlock_pci_list(); + + edac_printk(KERN_INFO, EDAC_PCI, + "Removed device %d for %s %s: DEV %s\n", + pci->pci_idx, pci->mod_name, pci->ctl_name, dev_name(pci)); + + return pci; +} + +EXPORT_SYMBOL_GPL(edac_pci_del_device); + +void edac_pci_generic_check(struct edac_pci_ctl_info *pci) +{ + edac_pci_do_parity_check(); +} + +static int edac_pci_idx; +#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller" + +struct edac_pci_gen_data { + int edac_idx; +}; + +struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, + const char *mod_name) +{ + struct edac_pci_ctl_info *pci; + struct edac_pci_gen_data *pdata; + + pci = edac_pci_alloc_ctl_info(sizeof(*pdata), EDAC_PCI_GENCTL_NAME); + if (!pci) + return NULL; + + pdata = pci->pvt_info; + pci->dev = dev; + dev_set_drvdata(pci->dev, pci); + pci->dev_name = pci_name(to_pci_dev(dev)); + + pci->mod_name = mod_name; + pci->ctl_name = EDAC_PCI_GENCTL_NAME; + pci->edac_check = edac_pci_generic_check; + + pdata->edac_idx = edac_pci_idx++; + + if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { + debugf3("%s(): failed edac_pci_add_device()\n", __func__); + edac_pci_free_ctl_info(pci); + return NULL; + } + + return pci; +} + +EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl); + +void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci) +{ + edac_pci_del_device(pci->dev); + edac_pci_free_ctl_info(pci); +} + +EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c new file mode 100644 index 000000000000..fac94cae2c3d --- /dev/null +++ b/drivers/edac/edac_pci_sysfs.c @@ -0,0 +1,620 @@ +/* + * (C) 2005, 2006 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> + * + */ +#include <linux/module.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> + +#include "edac_core.h" +#include "edac_module.h" + +#ifdef CONFIG_PCI + +#define EDAC_PCI_SYMLINK "device" + +static int check_pci_errors; /* default YES check PCI parity */ +static int edac_pci_panic_on_pe; /* default no panic on PCI Parity */ +static int edac_pci_log_pe = 1; /* log PCI parity errors */ +static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */ +static atomic_t pci_parity_count = ATOMIC_INIT(0); +static atomic_t pci_nonparity_count = ATOMIC_INIT(0); +static int edac_pci_poll_msec = 1000; + +static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ +static struct completion edac_pci_kobj_complete; +static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); + +int edac_pci_get_check_errors(void) +{ + return check_pci_errors; +} + +int edac_pci_get_log_pe(void) +{ + return edac_pci_log_pe; +} + +int edac_pci_get_log_npe(void) +{ + return edac_pci_log_npe; +} + +int edac_pci_get_panic_on_pe(void) +{ + return edac_pci_panic_on_pe; +} + +int edac_pci_get_poll_msec(void) +{ + return edac_pci_poll_msec; +} + +/**************************** EDAC PCI sysfs instance *******************/ +static ssize_t instance_pe_count_show(struct edac_pci_ctl_info *pci, char *data) +{ + return sprintf(data, "%u\n", atomic_read(&pci->counters.pe_count)); +} + +static ssize_t instance_npe_count_show(struct edac_pci_ctl_info *pci, + char *data) +{ + return sprintf(data, "%u\n", atomic_read(&pci->counters.npe_count)); +} + +#define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj) +#define to_instance_attr(a) container_of(a, struct instance_attribute, attr) + +/* DEVICE instance kobject release() function */ +static void edac_pci_instance_release(struct kobject *kobj) +{ + struct edac_pci_ctl_info *pci; + + debugf1("%s()\n", __func__); + + pci = to_instance(kobj); + complete(&pci->kobj_complete); +} + +/* instance specific attribute structure */ +struct instance_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_pci_ctl_info *, char *); + ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t); +}; + +/* Function to 'show' fields from the edac_pci 'instance' structure */ +static ssize_t edac_pci_instance_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_pci_ctl_info *pci = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->show) + return instance_attr->show(pci, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_pci 'instance' structure */ +static ssize_t edac_pci_instance_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_pci_ctl_info *pci = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->store) + return instance_attr->store(pci, buffer, count); + return -EIO; +} + +static struct sysfs_ops pci_instance_ops = { + .show = edac_pci_instance_show, + .store = edac_pci_instance_store +}; + +#define INSTANCE_ATTR(_name, _mode, _show, _store) \ +static struct instance_attribute attr_instance_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); +INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); + +/* pci instance attributes */ +static struct instance_attribute *pci_instance_attr[] = { + &attr_instance_pe_count, + &attr_instance_npe_count, + NULL +}; + +/* the ktype for pci instance */ +static struct kobj_type ktype_pci_instance = { + .release = edac_pci_instance_release, + .sysfs_ops = &pci_instance_ops, + .default_attrs = (struct attribute **)pci_instance_attr, +}; + +static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) +{ + int err; + + pci->kobj.parent = &edac_pci_kobj; + pci->kobj.ktype = &ktype_pci_instance; + + err = kobject_set_name(&pci->kobj, "pci%d", idx); + if (err) + return err; + + err = kobject_register(&pci->kobj); + if (err != 0) { + debugf2("%s() failed to register instance pci%d\n", + __func__, idx); + return err; + } + + debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx); + + return 0; +} + +static void +edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx) +{ + init_completion(&pci->kobj_complete); + kobject_unregister(&pci->kobj); + wait_for_completion(&pci->kobj_complete); +} + +/***************************** EDAC PCI sysfs root **********************/ +#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj) +#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr) + +static ssize_t edac_pci_int_show(void *ptr, char *buffer) +{ + int *value = ptr; + return sprintf(buffer, "%d\n", *value); +} + +static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) +{ + int *value = ptr; + + if (isdigit(*buffer)) + *value = simple_strtoul(buffer, NULL, 0); + + return count; +} + +struct edac_pci_dev_attribute { + struct attribute attr; + void *value; + ssize_t(*show) (void *, char *); + ssize_t(*store) (void *, const char *, size_t); +}; + +/* Set of show/store abstract level functions for PCI Parity object */ +static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + struct edac_pci_dev_attribute *edac_pci_dev; + edac_pci_dev = (struct edac_pci_dev_attribute *)attr; + + if (edac_pci_dev->show) + return edac_pci_dev->show(edac_pci_dev->value, buffer); + return -EIO; +} + +static ssize_t edac_pci_dev_store(struct kobject *kobj, + struct attribute *attr, const char *buffer, + size_t count) +{ + struct edac_pci_dev_attribute *edac_pci_dev; + edac_pci_dev = (struct edac_pci_dev_attribute *)attr; + + if (edac_pci_dev->show) + return edac_pci_dev->store(edac_pci_dev->value, buffer, count); + return -EIO; +} + +static struct sysfs_ops edac_pci_sysfs_ops = { + .show = edac_pci_dev_show, + .store = edac_pci_dev_store +}; + +#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ +static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = &_name, \ + .show = _show, \ + .store = _store, \ +}; + +#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ +static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = _data, \ + .show = _show, \ + .store = _store, \ +}; + +/* PCI Parity control files */ +EDAC_PCI_ATTR(check_pci_errors, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_log_pe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_log_npe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_panic_on_pe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); +EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); + +/* Base Attributes of the memory ECC object */ +static struct edac_pci_dev_attribute *edac_pci_attr[] = { + &edac_pci_attr_check_pci_errors, + &edac_pci_attr_edac_pci_log_pe, + &edac_pci_attr_edac_pci_log_npe, + &edac_pci_attr_edac_pci_panic_on_pe, + &edac_pci_attr_pci_parity_count, + &edac_pci_attr_pci_nonparity_count, + NULL, +}; + +/* No memory to release */ +static void edac_pci_release(struct kobject *kobj) +{ + struct edac_pci_ctl_info *pci; + + pci = to_edacpci(kobj); + + debugf1("%s()\n", __func__); + complete(&pci->kobj_complete); +} + +static struct kobj_type ktype_edac_pci = { + .release = edac_pci_release, + .sysfs_ops = &edac_pci_sysfs_ops, + .default_attrs = (struct attribute **)edac_pci_attr, +}; + +/** + * edac_sysfs_pci_setup() + * + * setup the sysfs for EDAC PCI attributes + * assumes edac_class has already been initialized + */ +int edac_pci_register_main_kobj(void) +{ + int err; + struct sysdev_class *edac_class; + + debugf1("%s()\n", __func__); + + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class\n", __func__); + return -ENODEV; + } + + edac_pci_kobj.ktype = &ktype_edac_pci; + + edac_pci_kobj.parent = &edac_class->kset.kobj; + + err = kobject_set_name(&edac_pci_kobj, "pci"); + if (err) + return err; + + /* Instanstiate the pci object */ + /* FIXME: maybe new sysdev_create_subdir() */ + err = kobject_register(&edac_pci_kobj); + + if (err) { + debugf1("Failed to register '.../edac/pci'\n"); + return err; + } + + debugf1("Registered '.../edac/pci' kobject\n"); + + return 0; +} + +/* + * edac_pci_unregister_main_kobj() + * + * perform the sysfs teardown for the PCI attributes + */ +void edac_pci_unregister_main_kobj(void) +{ + debugf0("%s()\n", __func__); + init_completion(&edac_pci_kobj_complete); + kobject_unregister(&edac_pci_kobj); + wait_for_completion(&edac_pci_kobj_complete); +} + +int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) +{ + int err; + struct kobject *edac_kobj = &pci->kobj; + + if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) { + err = edac_pci_register_main_kobj(); + if (err) { + atomic_dec(&edac_pci_sysfs_refcount); + return err; + } + } + + err = edac_pci_create_instance_kobj(pci, pci->pci_idx); + if (err) { + if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) + edac_pci_unregister_main_kobj(); + } + + debugf0("%s() idx=%d\n", __func__, pci->pci_idx); + + err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK); + if (err) { + debugf0("%s() sysfs_create_link() returned err= %d\n", + __func__, err); + return err; + } + + return 0; +} + +void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) +{ + debugf0("%s()\n", __func__); + + edac_pci_delete_instance_kobj(pci, pci->pci_idx); + + sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK); + + if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) + edac_pci_unregister_main_kobj(); +} + +/************************ PCI error handling *************************/ +static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) +{ + int where; + u16 status; + + where = secondary ? PCI_SEC_STATUS : PCI_STATUS; + pci_read_config_word(dev, where, &status); + + /* If we get back 0xFFFF then we must suspect that the card has been + * pulled but the Linux PCI layer has not yet finished cleaning up. + * We don't want to report on such devices + */ + + if (status == 0xFFFF) { + u32 sanity; + + pci_read_config_dword(dev, 0, &sanity); + + if (sanity == 0xFFFFFFFF) + return 0; + } + + status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | + PCI_STATUS_PARITY; + + if (status) + /* reset only the bits we are interested in */ + pci_write_config_word(dev, where, status); + + return status; +} + +typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev); + +/* Clear any PCI parity errors logged by this device. */ +static void edac_pci_dev_parity_clear(struct pci_dev *dev) +{ + u8 header_type; + + get_pci_parity_status(dev, 0); + + /* read the device TYPE, looking for bridges */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) + get_pci_parity_status(dev, 1); +} + +/* + * PCI Parity polling + * + */ +static void edac_pci_dev_parity_test(struct pci_dev *dev) +{ + u16 status; + u8 header_type; + + /* read the STATUS register on this device + */ + status = get_pci_parity_status(dev, 0); + + debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + + /* check the status reg for errors */ + if (status) { + if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Signaled System Error on %s\n", + pci_name(dev)); + atomic_inc(&pci_nonparity_count); + } + + if (status & (PCI_STATUS_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Master Data Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + + if (status & (PCI_STATUS_DETECTED_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Detected Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + } + + /* read the device TYPE, looking for bridges */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); + + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* On bridges, need to examine secondary status register */ + status = get_pci_parity_status(dev, 1); + + debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + + /* check the secondary status reg for errors */ + if (status) { + if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Signaled System Error on %s\n", + pci_name(dev)); + atomic_inc(&pci_nonparity_count); + } + + if (status & (PCI_STATUS_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Master Data Parity Error on " + "%s\n", pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + + if (status & (PCI_STATUS_DETECTED_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Detected Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + } + } +} + +/* + * pci_dev parity list iterator + * Scan the PCI device list for one iteration, looking for SERRORs + * Master Parity ERRORS or Parity ERRORs on primary or secondary devices + */ +static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) +{ + struct pci_dev *dev = NULL; + + /* request for kernel access to the next PCI device, if any, + * and while we are looking at it have its reference count + * bumped until we are done with it + */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + fn(dev); + } +} + +/* + * edac_pci_do_parity_check + * + * performs the actual PCI parity check operation + */ +void edac_pci_do_parity_check(void) +{ + unsigned long flags; + int before_count; + + debugf3("%s()\n", __func__); + + if (!check_pci_errors) + return; + + before_count = atomic_read(&pci_parity_count); + + /* scan all PCI devices looking for a Parity Error on devices and + * bridges + */ + local_irq_save(flags); + edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); + local_irq_restore(flags); + + /* Only if operator has selected panic on PCI Error */ + if (edac_pci_get_panic_on_pe()) { + /* If the count is different 'after' from 'before' */ + if (before_count != atomic_read(&pci_parity_count)) + panic("EDAC: PCI Parity Error"); + } +} + +void edac_pci_clear_parity_errors(void) +{ + /* Clear any PCI bus parity errors that devices initially have logged + * in their registers. + */ + edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); +} +void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg) +{ + + /* global PE counter incremented by edac_pci_do_parity_check() */ + atomic_inc(&pci->counters.pe_count); + + if (edac_pci_get_log_pe()) + edac_pci_printk(pci, KERN_WARNING, + "Parity Error ctl: %s %d: %s\n", + pci->ctl_name, pci->pci_idx, msg); + + /* + * poke all PCI devices and see which one is the troublemaker + * panic() is called if set + */ + edac_pci_do_parity_check(); +} + +EXPORT_SYMBOL_GPL(edac_pci_handle_pe); + +void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg) +{ + + /* global NPE counter incremented by edac_pci_do_parity_check() */ + atomic_inc(&pci->counters.npe_count); + + if (edac_pci_get_log_npe()) + edac_pci_printk(pci, KERN_WARNING, + "Non-Parity Error ctl: %s %d: %s\n", + pci->ctl_name, pci->pci_idx, msg); + + /* + * poke all PCI devices and see which one is the troublemaker + * panic() is called if set + */ + edac_pci_do_parity_check(); +} + +EXPORT_SYMBOL_GPL(edac_pci_handle_npe); + +/* + * Define the PCI parameter to the module + */ +module_param(check_pci_errors, int, 0644); +MODULE_PARM_DESC(check_pci_errors, + "Check for PCI bus parity errors: 0=off 1=on"); +module_param(edac_pci_panic_on_pe, int, 0644); +MODULE_PARM_DESC(edac_pci_panic_on_pe, + "Panic on PCI Bus Parity error: 0=off 1=on"); + +#endif /* CONFIG_PCI */ diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c new file mode 100644 index 000000000000..20b428aa155e --- /dev/null +++ b/drivers/edac/edac_stub.c @@ -0,0 +1,46 @@ +/* + * common EDAC components that must be in kernel + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#include <linux/module.h> +#include <linux/edac.h> +#include <asm/atomic.h> +#include <asm/edac.h> + +int edac_op_state = EDAC_OPSTATE_INVAL; +EXPORT_SYMBOL_GPL(edac_op_state); + +atomic_t edac_handlers = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(edac_handlers); + +int edac_err_assert = 0; +EXPORT_SYMBOL_GPL(edac_err_assert); + +/* + * called to determine if there is an EDAC driver interested in + * knowing an event (such as NMI) occurred + */ +int edac_handler_set(void) +{ + if (edac_op_state == EDAC_OPSTATE_POLL) + return 0; + + return atomic_read(&edac_handlers); +} +EXPORT_SYMBOL_GPL(edac_handler_set); + +/* + * handler for NMI type of interrupts to assert error + */ +void edac_atomic_assert_error(void) +{ + edac_err_assert++; +} +EXPORT_SYMBOL_GPL(edac_atomic_assert_error); diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c new file mode 100644 index 000000000000..0ecfdc432f87 --- /dev/null +++ b/drivers/edac/i3000_edac.c @@ -0,0 +1,506 @@ +/* + * Intel 3000/3010 Memory Controller kernel module + * Copyright (C) 2007 Akamai Technologies, Inc. + * Shamelessly copied from: + * Intel D82875P Memory Controller kernel module + * (C) 2003 Linux Networx (http://lnxi.com) + * + * This file may be distributed under the terms of the + * GNU General Public License. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include "edac_core.h" + +#define I3000_REVISION "1.1" + +#define EDAC_MOD_STR "i3000_edac" + +#define I3000_RANKS 8 +#define I3000_RANKS_PER_CHANNEL 4 +#define I3000_CHANNELS 2 + +/* Intel 3000 register addresses - device 0 function 0 - DRAM Controller */ + +#define I3000_MCHBAR 0x44 /* MCH Memory Mapped Register BAR */ +#define I3000_MCHBAR_MASK 0xffffc000 +#define I3000_MMR_WINDOW_SIZE 16384 + +#define I3000_EDEAP 0x70 /* Extended DRAM Error Address Pointer (8b) + * + * 7:1 reserved + * 0 bit 32 of address + */ +#define I3000_DEAP 0x58 /* DRAM Error Address Pointer (32b) + * + * 31:7 address + * 6:1 reserved + * 0 Error channel 0/1 + */ +#define I3000_DEAP_GRAIN (1 << 7) +#define I3000_DEAP_PFN(edeap, deap) ((((edeap) & 1) << (32 - PAGE_SHIFT)) | \ + ((deap) >> PAGE_SHIFT)) +#define I3000_DEAP_OFFSET(deap) ((deap) & ~(I3000_DEAP_GRAIN-1) & ~PAGE_MASK) +#define I3000_DEAP_CHANNEL(deap) ((deap) & 1) + +#define I3000_DERRSYN 0x5c /* DRAM Error Syndrome (8b) + * + * 7:0 DRAM ECC Syndrome + */ + +#define I3000_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15:12 reserved + * 11 MCH Thermal Sensor Event for SMI/SCI/SERR + * 10 reserved + * 9 LOCK to non-DRAM Memory Flag (LCKF) + * 8 Received Refresh Timeout Flag (RRTOF) + * 7:2 reserved + * 1 Multiple-bit DRAM ECC Error Flag (DMERR) + * 0 Single-bit DRAM ECC Error Flag (DSERR) + */ +#define I3000_ERRSTS_BITS 0x0b03 /* bits which indicate errors */ +#define I3000_ERRSTS_UE 0x0002 +#define I3000_ERRSTS_CE 0x0001 + +#define I3000_ERRCMD 0xca /* Error Command (16b) + * + * 15:12 reserved + * 11 SERR on MCH Thermal Sensor Event (TSESERR) + * 10 reserved + * 9 SERR on LOCK to non-DRAM Memory (LCKERR) + * 8 SERR on DRAM Refresh Timeout (DRTOERR) + * 7:2 reserved + * 1 SERR Multiple-Bit DRAM ECC Error (DMERR) + * 0 SERR on Single-Bit ECC Error (DSERR) + */ + +/* Intel MMIO register space - device 0 function 0 - MMR space */ + +#define I3000_DRB_SHIFT 25 /* 32MiB grain */ + +#define I3000_C0DRB 0x100 /* Channel 0 DRAM Rank Boundary (8b x 4) + * + * 7:0 Channel 0 DRAM Rank Boundary Address + */ +#define I3000_C1DRB 0x180 /* Channel 1 DRAM Rank Boundary (8b x 4) + * + * 7:0 Channel 1 DRAM Rank Boundary Address + */ + +#define I3000_C0DRA 0x108 /* Channel 0 DRAM Rank Attribute (8b x 2) + * + * 7 reserved + * 6:4 DRAM odd Rank Attribute + * 3 reserved + * 2:0 DRAM even Rank Attribute + * + * Each attribute defines the page + * size of the corresponding rank: + * 000: unpopulated + * 001: reserved + * 010: 4 KB + * 011: 8 KB + * 100: 16 KB + * Others: reserved + */ +#define I3000_C1DRA 0x188 /* Channel 1 DRAM Rank Attribute (8b x 2) */ +#define ODD_RANK_ATTRIB(dra) (((dra) & 0x70) >> 4) +#define EVEN_RANK_ATTRIB(dra) ((dra) & 0x07) + +#define I3000_C0DRC0 0x120 /* DRAM Controller Mode 0 (32b) + * + * 31:30 reserved + * 29 Initialization Complete (IC) + * 28:11 reserved + * 10:8 Refresh Mode Select (RMS) + * 7 reserved + * 6:4 Mode Select (SMS) + * 3:2 reserved + * 1:0 DRAM Type (DT) + */ + +#define I3000_C0DRC1 0x124 /* DRAM Controller Mode 1 (32b) + * + * 31 Enhanced Addressing Enable (ENHADE) + * 30:0 reserved + */ + +enum i3000p_chips { + I3000 = 0, +}; + +struct i3000_dev_info { + const char *ctl_name; +}; + +struct i3000_error_info { + u16 errsts; + u8 derrsyn; + u8 edeap; + u32 deap; + u16 errsts2; +}; + +static const struct i3000_dev_info i3000_devs[] = { + [I3000] = { + .ctl_name = "i3000"}, +}; + +static struct pci_dev *mci_pdev; +static int i3000_registered = 1; +static struct edac_pci_ctl_info *i3000_pci; + +static void i3000_get_error_info(struct mem_ctl_info *mci, + struct i3000_error_info *info) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(mci->dev); + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, I3000_ERRSTS, &info->errsts); + if (!(info->errsts & I3000_ERRSTS_BITS)) + return; + pci_read_config_byte(pdev, I3000_EDEAP, &info->edeap); + pci_read_config_dword(pdev, I3000_DEAP, &info->deap); + pci_read_config_byte(pdev, I3000_DERRSYN, &info->derrsyn); + pci_read_config_word(pdev, I3000_ERRSTS, &info->errsts2); + + /* + * If the error is the same for both reads then the first set + * of reads is valid. If there is a change then there is a CE + * with no info and the second set of reads is valid and + * should be UE info. + */ + if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) { + pci_read_config_byte(pdev, I3000_EDEAP, &info->edeap); + pci_read_config_dword(pdev, I3000_DEAP, &info->deap); + pci_read_config_byte(pdev, I3000_DERRSYN, &info->derrsyn); + } + + /* Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(pdev, I3000_ERRSTS, I3000_ERRSTS_BITS, + I3000_ERRSTS_BITS); +} + +static int i3000_process_error_info(struct mem_ctl_info *mci, + struct i3000_error_info *info, + int handle_errors) +{ + int row, multi_chan; + int pfn, offset, channel; + + multi_chan = mci->csrows[0].nr_channels - 1; + + if (!(info->errsts & I3000_ERRSTS_BITS)) + return 0; + + if (!handle_errors) + return 1; + + if ((info->errsts ^ info->errsts2) & I3000_ERRSTS_BITS) { + edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + info->errsts = info->errsts2; + } + + pfn = I3000_DEAP_PFN(info->edeap, info->deap); + offset = I3000_DEAP_OFFSET(info->deap); + channel = I3000_DEAP_CHANNEL(info->deap); + + row = edac_mc_find_csrow_by_page(mci, pfn); + + if (info->errsts & I3000_ERRSTS_UE) + edac_mc_handle_ue(mci, pfn, offset, row, "i3000 UE"); + else + edac_mc_handle_ce(mci, pfn, offset, info->derrsyn, row, + multi_chan ? channel : 0, "i3000 CE"); + + return 1; +} + +static void i3000_check(struct mem_ctl_info *mci) +{ + struct i3000_error_info info; + + debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + i3000_get_error_info(mci, &info); + i3000_process_error_info(mci, &info, 1); +} + +static int i3000_is_interleaved(const unsigned char *c0dra, + const unsigned char *c1dra, + const unsigned char *c0drb, + const unsigned char *c1drb) +{ + int i; + + /* If the channels aren't populated identically then + * we're not interleaved. + */ + for (i = 0; i < I3000_RANKS_PER_CHANNEL / 2; i++) + if (ODD_RANK_ATTRIB(c0dra[i]) != ODD_RANK_ATTRIB(c1dra[i]) || + EVEN_RANK_ATTRIB(c0dra[i]) != + EVEN_RANK_ATTRIB(c1dra[i])) + return 0; + + /* If the rank boundaries for the two channels are different + * then we're not interleaved. + */ + for (i = 0; i < I3000_RANKS_PER_CHANNEL; i++) + if (c0drb[i] != c1drb[i]) + return 0; + + return 1; +} + +static int i3000_probe1(struct pci_dev *pdev, int dev_idx) +{ + int rc; + int i; + struct mem_ctl_info *mci = NULL; + unsigned long last_cumul_size; + int interleaved, nr_channels; + unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS]; + unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2]; + unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL]; + unsigned long mchbar; + void *window; + + debugf0("MC: %s()\n", __func__); + + pci_read_config_dword(pdev, I3000_MCHBAR, (u32 *) & mchbar); + mchbar &= I3000_MCHBAR_MASK; + window = ioremap_nocache(mchbar, I3000_MMR_WINDOW_SIZE); + if (!window) { + printk(KERN_ERR "i3000: cannot map mmio space at 0x%lx\n", + mchbar); + return -ENODEV; + } + + c0dra[0] = readb(window + I3000_C0DRA + 0); /* ranks 0,1 */ + c0dra[1] = readb(window + I3000_C0DRA + 1); /* ranks 2,3 */ + c1dra[0] = readb(window + I3000_C1DRA + 0); /* ranks 0,1 */ + c1dra[1] = readb(window + I3000_C1DRA + 1); /* ranks 2,3 */ + + for (i = 0; i < I3000_RANKS_PER_CHANNEL; i++) { + c0drb[i] = readb(window + I3000_C0DRB + i); + c1drb[i] = readb(window + I3000_C1DRB + i); + } + + iounmap(window); + + /* Figure out how many channels we have. + * + * If we have what the datasheet calls "asymmetric channels" + * (essentially the same as what was called "virtual single + * channel mode" in the i82875) then it's a single channel as + * far as EDAC is concerned. + */ + interleaved = i3000_is_interleaved(c0dra, c1dra, c0drb, c1drb); + nr_channels = interleaved ? 2 : 1; + mci = edac_mc_alloc(0, I3000_RANKS / nr_channels, nr_channels, 0); + if (!mci) + return -ENOMEM; + + debugf3("MC: %s(): init mci\n", __func__); + + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR2; + + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I3000_REVISION; + mci->ctl_name = i3000_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->edac_check = i3000_check; + mci->ctl_page_to_phys = NULL; + + /* + * The dram rank boundary (DRB) reg values are boundary addresses + * for each DRAM rank with a granularity of 32MB. DRB regs are + * cumulative; the last one will contain the total memory + * contained in all ranks. + * + * If we're in interleaved mode then we're only walking through + * the ranks of controller 0, so we double all the values we see. + */ + for (last_cumul_size = i = 0; i < mci->nr_csrows; i++) { + u8 value; + u32 cumul_size; + struct csrow_info *csrow = &mci->csrows[i]; + + value = drb[i]; + cumul_size = value << (I3000_DRB_SHIFT - PAGE_SHIFT); + if (interleaved) + cumul_size <<= 1; + debugf3("MC: %s(): (%d) cumul_size 0x%x\n", + __func__, i, cumul_size); + if (cumul_size == last_cumul_size) { + csrow->mtype = MEM_EMPTY; + continue; + } + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = I3000_DEAP_GRAIN; + csrow->mtype = MEM_DDR2; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = EDAC_UNKNOWN; + } + + /* Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(pdev, I3000_ERRSTS, I3000_ERRSTS_BITS, + I3000_ERRSTS_BITS); + + rc = -ENODEV; + if (edac_mc_add_mc(mci)) { + debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + goto fail; + } + + /* allocating generic PCI control info */ + i3000_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i3000_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + /* get this far and it's successful */ + debugf3("MC: %s(): success\n", __func__); + return 0; + + fail: + if (mci) + edac_mc_free(mci); + + return rc; +} + +/* returns count (>= 0), or negative on error */ +static int __devinit i3000_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + debugf0("MC: %s()\n", __func__); + + if (pci_enable_device(pdev) < 0) + return -EIO; + + rc = i3000_probe1(pdev, ent->driver_data); + if (mci_pdev == NULL) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void __devexit i3000_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0("%s()\n", __func__); + + if (i3000_pci) + edac_pci_release_generic_ctl(i3000_pci); + + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) + return; + + edac_mc_free(mci); +} + +static const struct pci_device_id i3000_pci_tbl[] __devinitdata = { + { + PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I3000}, + { + 0, + } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i3000_pci_tbl); + +static struct pci_driver i3000_driver = { + .name = EDAC_MOD_STR, + .probe = i3000_init_one, + .remove = __devexit_p(i3000_remove_one), + .id_table = i3000_pci_tbl, +}; + +static int __init i3000_init(void) +{ + int pci_rc; + + debugf3("MC: %s()\n", __func__); + pci_rc = pci_register_driver(&i3000_driver); + if (pci_rc < 0) + goto fail0; + + if (mci_pdev == NULL) { + i3000_registered = 0; + mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_3000_HB, NULL); + if (!mci_pdev) { + debugf0("i3000 pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + + pci_rc = i3000_init_one(mci_pdev, i3000_pci_tbl); + if (pci_rc < 0) { + debugf0("i3000 init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; + +fail1: + pci_unregister_driver(&i3000_driver); + +fail0: + if (mci_pdev) + pci_dev_put(mci_pdev); + + return pci_rc; +} + +static void __exit i3000_exit(void) +{ + debugf3("MC: %s()\n", __func__); + + pci_unregister_driver(&i3000_driver); + if (!i3000_registered) { + i3000_remove_one(mci_pdev); + pci_dev_put(mci_pdev); + } +} + +module_init(i3000_init); +module_exit(i3000_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akamai Technologies Arthur Ulfeldt/Jason Uhlenkott"); +MODULE_DESCRIPTION("MC support for Intel 3000 memory hub controllers"); diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c new file mode 100644 index 000000000000..96f7e63e3996 --- /dev/null +++ b/drivers/edac/i5000_edac.c @@ -0,0 +1,1505 @@ +/* + * Intel 5000(P/V/X) class Memory Controllers kernel module + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Douglas Thompson Linux Networx (http://lnxi.com) + * norsk5@xmission.com + * + * This module is based on the following document: + * + * Intel 5000X Chipset Memory Controller Hub (MCH) - Datasheet + * http://developer.intel.com/design/chipsets/datashts/313070.htm + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/edac.h> +#include <asm/mmzone.h> + +#include "edac_core.h" + +/* + * Alter this version for the I5000 module when modifications are made + */ +#define I5000_REVISION " Ver: 2.0.12 " __DATE__ +#define EDAC_MOD_STR "i5000_edac" + +#define i5000_printk(level, fmt, arg...) \ + edac_printk(level, "i5000", fmt, ##arg) + +#define i5000_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i5000", fmt, ##arg) + +#ifndef PCI_DEVICE_ID_INTEL_FBD_0 +#define PCI_DEVICE_ID_INTEL_FBD_0 0x25F5 +#endif +#ifndef PCI_DEVICE_ID_INTEL_FBD_1 +#define PCI_DEVICE_ID_INTEL_FBD_1 0x25F6 +#endif + +/* Device 16, + * Function 0: System Address + * Function 1: Memory Branch Map, Control, Errors Register + * Function 2: FSB Error Registers + * + * All 3 functions of Device 16 (0,1,2) share the SAME DID + */ +#define PCI_DEVICE_ID_INTEL_I5000_DEV16 0x25F0 + +/* OFFSETS for Function 0 */ + +/* OFFSETS for Function 1 */ +#define AMBASE 0x48 +#define MAXCH 0x56 +#define MAXDIMMPERCH 0x57 +#define TOLM 0x6C +#define REDMEMB 0x7C +#define RED_ECC_LOCATOR(x) ((x) & 0x3FFFF) +#define REC_ECC_LOCATOR_EVEN(x) ((x) & 0x001FF) +#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3FE00) +#define MIR0 0x80 +#define MIR1 0x84 +#define MIR2 0x88 +#define AMIR0 0x8C +#define AMIR1 0x90 +#define AMIR2 0x94 + +#define FERR_FAT_FBD 0x98 +#define NERR_FAT_FBD 0x9C +#define EXTRACT_FBDCHAN_INDX(x) (((x)>>28) & 0x3) +#define FERR_FAT_FBDCHAN 0x30000000 +#define FERR_FAT_M3ERR 0x00000004 +#define FERR_FAT_M2ERR 0x00000002 +#define FERR_FAT_M1ERR 0x00000001 +#define FERR_FAT_MASK (FERR_FAT_M1ERR | \ + FERR_FAT_M2ERR | \ + FERR_FAT_M3ERR) + +#define FERR_NF_FBD 0xA0 + +/* Thermal and SPD or BFD errors */ +#define FERR_NF_M28ERR 0x01000000 +#define FERR_NF_M27ERR 0x00800000 +#define FERR_NF_M26ERR 0x00400000 +#define FERR_NF_M25ERR 0x00200000 +#define FERR_NF_M24ERR 0x00100000 +#define FERR_NF_M23ERR 0x00080000 +#define FERR_NF_M22ERR 0x00040000 +#define FERR_NF_M21ERR 0x00020000 + +/* Correctable errors */ +#define FERR_NF_M20ERR 0x00010000 +#define FERR_NF_M19ERR 0x00008000 +#define FERR_NF_M18ERR 0x00004000 +#define FERR_NF_M17ERR 0x00002000 + +/* Non-Retry or redundant Retry errors */ +#define FERR_NF_M16ERR 0x00001000 +#define FERR_NF_M15ERR 0x00000800 +#define FERR_NF_M14ERR 0x00000400 +#define FERR_NF_M13ERR 0x00000200 + +/* Uncorrectable errors */ +#define FERR_NF_M12ERR 0x00000100 +#define FERR_NF_M11ERR 0x00000080 +#define FERR_NF_M10ERR 0x00000040 +#define FERR_NF_M9ERR 0x00000020 +#define FERR_NF_M8ERR 0x00000010 +#define FERR_NF_M7ERR 0x00000008 +#define FERR_NF_M6ERR 0x00000004 +#define FERR_NF_M5ERR 0x00000002 +#define FERR_NF_M4ERR 0x00000001 + +#define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \ + FERR_NF_M11ERR | \ + FERR_NF_M10ERR | \ + FERR_NF_M8ERR | \ + FERR_NF_M7ERR | \ + FERR_NF_M6ERR | \ + FERR_NF_M5ERR | \ + FERR_NF_M4ERR) +#define FERR_NF_CORRECTABLE (FERR_NF_M20ERR | \ + FERR_NF_M19ERR | \ + FERR_NF_M18ERR | \ + FERR_NF_M17ERR) +#define FERR_NF_DIMM_SPARE (FERR_NF_M27ERR | \ + FERR_NF_M28ERR) +#define FERR_NF_THERMAL (FERR_NF_M26ERR | \ + FERR_NF_M25ERR | \ + FERR_NF_M24ERR | \ + FERR_NF_M23ERR) +#define FERR_NF_SPD_PROTOCOL (FERR_NF_M22ERR) +#define FERR_NF_NORTH_CRC (FERR_NF_M21ERR) +#define FERR_NF_NON_RETRY (FERR_NF_M13ERR | \ + FERR_NF_M14ERR | \ + FERR_NF_M15ERR) + +#define NERR_NF_FBD 0xA4 +#define FERR_NF_MASK (FERR_NF_UNCORRECTABLE | \ + FERR_NF_CORRECTABLE | \ + FERR_NF_DIMM_SPARE | \ + FERR_NF_THERMAL | \ + FERR_NF_SPD_PROTOCOL | \ + FERR_NF_NORTH_CRC | \ + FERR_NF_NON_RETRY) + +#define EMASK_FBD 0xA8 +#define EMASK_FBD_M28ERR 0x08000000 +#define EMASK_FBD_M27ERR 0x04000000 +#define EMASK_FBD_M26ERR 0x02000000 +#define EMASK_FBD_M25ERR 0x01000000 +#define EMASK_FBD_M24ERR 0x00800000 +#define EMASK_FBD_M23ERR 0x00400000 +#define EMASK_FBD_M22ERR 0x00200000 +#define EMASK_FBD_M21ERR 0x00100000 +#define EMASK_FBD_M20ERR 0x00080000 +#define EMASK_FBD_M19ERR 0x00040000 +#define EMASK_FBD_M18ERR 0x00020000 +#define EMASK_FBD_M17ERR 0x00010000 + +#define EMASK_FBD_M15ERR 0x00004000 +#define EMASK_FBD_M14ERR 0x00002000 +#define EMASK_FBD_M13ERR 0x00001000 +#define EMASK_FBD_M12ERR 0x00000800 +#define EMASK_FBD_M11ERR 0x00000400 +#define EMASK_FBD_M10ERR 0x00000200 +#define EMASK_FBD_M9ERR 0x00000100 +#define EMASK_FBD_M8ERR 0x00000080 +#define EMASK_FBD_M7ERR 0x00000040 +#define EMASK_FBD_M6ERR 0x00000020 +#define EMASK_FBD_M5ERR 0x00000010 +#define EMASK_FBD_M4ERR 0x00000008 +#define EMASK_FBD_M3ERR 0x00000004 +#define EMASK_FBD_M2ERR 0x00000002 +#define EMASK_FBD_M1ERR 0x00000001 + +#define ENABLE_EMASK_FBD_FATAL_ERRORS (EMASK_FBD_M1ERR | \ + EMASK_FBD_M2ERR | \ + EMASK_FBD_M3ERR) + +#define ENABLE_EMASK_FBD_UNCORRECTABLE (EMASK_FBD_M4ERR | \ + EMASK_FBD_M5ERR | \ + EMASK_FBD_M6ERR | \ + EMASK_FBD_M7ERR | \ + EMASK_FBD_M8ERR | \ + EMASK_FBD_M9ERR | \ + EMASK_FBD_M10ERR | \ + EMASK_FBD_M11ERR | \ + EMASK_FBD_M12ERR) +#define ENABLE_EMASK_FBD_CORRECTABLE (EMASK_FBD_M17ERR | \ + EMASK_FBD_M18ERR | \ + EMASK_FBD_M19ERR | \ + EMASK_FBD_M20ERR) +#define ENABLE_EMASK_FBD_DIMM_SPARE (EMASK_FBD_M27ERR | \ + EMASK_FBD_M28ERR) +#define ENABLE_EMASK_FBD_THERMALS (EMASK_FBD_M26ERR | \ + EMASK_FBD_M25ERR | \ + EMASK_FBD_M24ERR | \ + EMASK_FBD_M23ERR) +#define ENABLE_EMASK_FBD_SPD_PROTOCOL (EMASK_FBD_M22ERR) +#define ENABLE_EMASK_FBD_NORTH_CRC (EMASK_FBD_M21ERR) +#define ENABLE_EMASK_FBD_NON_RETRY (EMASK_FBD_M15ERR | \ + EMASK_FBD_M14ERR | \ + EMASK_FBD_M13ERR) + +#define ENABLE_EMASK_ALL (ENABLE_EMASK_FBD_NON_RETRY | \ + ENABLE_EMASK_FBD_NORTH_CRC | \ + ENABLE_EMASK_FBD_SPD_PROTOCOL | \ + ENABLE_EMASK_FBD_THERMALS | \ + ENABLE_EMASK_FBD_DIMM_SPARE | \ + ENABLE_EMASK_FBD_FATAL_ERRORS | \ + ENABLE_EMASK_FBD_CORRECTABLE | \ + ENABLE_EMASK_FBD_UNCORRECTABLE) + +#define ERR0_FBD 0xAC +#define ERR1_FBD 0xB0 +#define ERR2_FBD 0xB4 +#define MCERR_FBD 0xB8 +#define NRECMEMA 0xBE +#define NREC_BANK(x) (((x)>>12) & 0x7) +#define NREC_RDWR(x) (((x)>>11) & 1) +#define NREC_RANK(x) (((x)>>8) & 0x7) +#define NRECMEMB 0xC0 +#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define NREC_RAS(x) ((x) & 0x7FFF) +#define NRECFGLOG 0xC4 +#define NREEECFBDA 0xC8 +#define NREEECFBDB 0xCC +#define NREEECFBDC 0xD0 +#define NREEECFBDD 0xD4 +#define NREEECFBDE 0xD8 +#define REDMEMA 0xDC +#define RECMEMA 0xE2 +#define REC_BANK(x) (((x)>>12) & 0x7) +#define REC_RDWR(x) (((x)>>11) & 1) +#define REC_RANK(x) (((x)>>8) & 0x7) +#define RECMEMB 0xE4 +#define REC_CAS(x) (((x)>>16) & 0xFFFFFF) +#define REC_RAS(x) ((x) & 0x7FFF) +#define RECFGLOG 0xE8 +#define RECFBDA 0xEC +#define RECFBDB 0xF0 +#define RECFBDC 0xF4 +#define RECFBDD 0xF8 +#define RECFBDE 0xFC + +/* OFFSETS for Function 2 */ + +/* + * Device 21, + * Function 0: Memory Map Branch 0 + * + * Device 22, + * Function 0: Memory Map Branch 1 + */ +#define PCI_DEVICE_ID_I5000_BRANCH_0 0x25F5 +#define PCI_DEVICE_ID_I5000_BRANCH_1 0x25F6 + +#define AMB_PRESENT_0 0x64 +#define AMB_PRESENT_1 0x66 +#define MTR0 0x80 +#define MTR1 0x84 +#define MTR2 0x88 +#define MTR3 0x8C + +#define NUM_MTRS 4 +#define CHANNELS_PER_BRANCH (2) + +/* Defines to extract the vaious fields from the + * MTRx - Memory Technology Registers + */ +#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (0x1 << 8)) +#define MTR_DRAM_WIDTH(mtr) ((((mtr) >> 6) & 0x1) ? 8 : 4) +#define MTR_DRAM_BANKS(mtr) ((((mtr) >> 5) & 0x1) ? 8 : 4) +#define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2) +#define MTR_DIMM_RANK(mtr) (((mtr) >> 4) & 0x1) +#define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1) +#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) +#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) +#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) +#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) + +#ifdef CONFIG_EDAC_DEBUG +static char *numrow_toString[] = { + "8,192 - 13 rows", + "16,384 - 14 rows", + "32,768 - 15 rows", + "reserved" +}; + +static char *numcol_toString[] = { + "1,024 - 10 columns", + "2,048 - 11 columns", + "4,096 - 12 columns", + "reserved" +}; +#endif + +/* Enumeration of supported devices */ +enum i5000_chips { + I5000P = 0, + I5000V = 1, /* future */ + I5000X = 2 /* future */ +}; + +/* Device name and register DID (Device ID) */ +struct i5000_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i5000_dev_info i5000_devs[] = { + [I5000P] = { + .ctl_name = "I5000", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I5000_DEV16, + }, +}; + +struct i5000_dimm_info { + int megabytes; /* size, 0 means not present */ + int dual_rank; +}; + +#define MAX_CHANNELS 6 /* max possible channels */ +#define MAX_CSROWS (8*2) /* max possible csrows per channel */ + +/* driver private data structure */ +struct i5000_pvt { + struct pci_dev *system_address; /* 16.0 */ + struct pci_dev *branchmap_werrors; /* 16.1 */ + struct pci_dev *fsb_error_regs; /* 16.2 */ + struct pci_dev *branch_0; /* 21.0 */ + struct pci_dev *branch_1; /* 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u16 mir0, mir1, mir2; + + u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b0_ambpresent0; /* Branch 0, Channel 0 */ + u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ + + u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */ + u16 b1_ambpresent0; /* Branch 1, Channel 8 */ + u16 b1_ambpresent1; /* Branch 1, Channel 1 */ + + /* DIMM infomation matrix, allocating architecture maximums */ + struct i5000_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS]; + + /* Actual values for this controller */ + int maxch; /* Max channels */ + int maxdimmperch; /* Max DIMMs per channel */ +}; + +/* I5000 MCH error information retrieved from Hardware */ +struct i5000_error_info { + + /* These registers are always read from the MC */ + u32 ferr_fat_fbd; /* First Errors Fatal */ + u32 nerr_fat_fbd; /* Next Errors Fatal */ + u32 ferr_nf_fbd; /* First Errors Non-Fatal */ + u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ + + /* These registers are input ONLY if there was a Recoverable Error */ + u32 redmemb; /* Recoverable Mem Data Error log B */ + u16 recmema; /* Recoverable Mem Error log A */ + u32 recmemb; /* Recoverable Mem Error log B */ + + /* These registers are input ONLY if there was a + * Non-Recoverable Error */ + u16 nrecmema; /* Non-Recoverable Mem log A */ + u16 nrecmemb; /* Non-Recoverable Mem log B */ + +}; + +static struct edac_pci_ctl_info *i5000_pci; + +/* + * i5000_get_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i5000_get_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info) +{ + struct i5000_pvt *pvt; + u32 value; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + + /* Mask only the bits that the doc says are valid + */ + value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); + + /* If there is an error, then read in the */ + /* NEXT FATAL error register and the Memory Error Log Register A */ + if (value & FERR_FAT_MASK) { + info->ferr_fat_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_FAT_FBD, &info->nerr_fat_fbd); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMA, &info->nrecmema); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMB, &info->nrecmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_FAT_FBD, value); + } else { + info->ferr_fat_fbd = 0; + info->nerr_fat_fbd = 0; + info->nrecmema = 0; + info->nrecmemb = 0; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + + /* If there is an error, then read in the 1st NON-FATAL error + * register as well */ + if (value & FERR_NF_MASK) { + info->ferr_nf_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_NF_FBD, &info->nerr_nf_fbd); + pci_read_config_word(pvt->branchmap_werrors, + RECMEMA, &info->recmema); + pci_read_config_dword(pvt->branchmap_werrors, + RECMEMB, &info->recmemb); + pci_read_config_dword(pvt->branchmap_werrors, + REDMEMB, &info->redmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_NF_FBD, value); + } else { + info->ferr_nf_fbd = 0; + info->nerr_nf_fbd = 0; + info->recmema = 0; + info->recmemb = 0; + info->redmemb = 0; + } +} + +/* + * i5000_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i5000_error_info *info, + * int handle_errors); + * + * handle the Intel FATAL errors, if any + */ +static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info, + int handle_errors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90]; + u32 allErrors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + + /* mask off the Error bits that are possible */ + allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + i5000_mc_printk(mci, KERN_ERR, + "FATAL ERRORS Found!!! 1st FATAL Err Reg= 0x%x\n", + allErrors); + + branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); + channel = branch; + + /* Use the NON-Recoverable macros to extract data */ + bank = NREC_BANK(info->nrecmema); + rank = NREC_RANK(info->nrecmema); + rdwr = NREC_RDWR(info->nrecmema); + ras = NREC_RAS(info->nrecmemb); + cas = NREC_CAS(info->nrecmemb); + + debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); + + /* Only 1 bit will be on */ + if (allErrors & FERR_FAT_M1ERR) { + i5000_mc_printk(mci, KERN_ERR, + "Alert on non-redundant retry or fast " + "reset timeout\n"); + + } else if (allErrors & FERR_FAT_M2ERR) { + i5000_mc_printk(mci, KERN_ERR, + "Northbound CRC error on non-redundant " + "retry\n"); + + } else if (allErrors & FERR_FAT_M3ERR) { + i5000_mc_printk(mci, KERN_ERR, + ">Tmid Thermal event with intelligent " + "throttling disabled\n"); + } + + /* Form out message */ + snprintf(msg, sizeof(msg), + "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " + "FATAL Err=0x%x)", + branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, + allErrors); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); +} + +/* + * i5000_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i5000_error_info *info, + * int handle_errors); + * + * handle the Intel NON-FATAL errors, if any + */ +static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info, + int handle_errors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90]; + u32 allErrors; + u32 ue_errors; + u32 ce_errors; + u32 misc_errors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + + /* mask off the Error bits that are possible */ + allErrors = (info->ferr_nf_fbd & FERR_NF_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + i5000_mc_printk(mci, KERN_WARNING, + "NON-FATAL ERRORS Found!!! 1st NON-FATAL Err " + "Reg= 0x%x\n", allErrors); + + ue_errors = allErrors & FERR_NF_UNCORRECTABLE; + if (ue_errors) { + debugf0("\tUncorrected bits= 0x%x\n", ue_errors); + + branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); + channel = branch; + bank = NREC_BANK(info->nrecmema); + rank = NREC_RANK(info->nrecmema); + rdwr = NREC_RDWR(info->nrecmema); + ras = NREC_RAS(info->nrecmemb); + cas = NREC_CAS(info->nrecmemb); + + debugf0 + ("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " + "CAS=%d, UE Err=0x%x)", + branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, + ue_errors); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); + } + + /* Check correctable errors */ + ce_errors = allErrors & FERR_NF_CORRECTABLE; + if (ce_errors) { + debugf0("\tCorrected bits= 0x%x\n", ce_errors); + + branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); + + channel = 0; + if (REC_ECC_LOCATOR_ODD(info->redmemb)) + channel = 1; + + /* Convert channel to be based from zero, instead of + * from branch base of 0 */ + channel += branch; + + bank = REC_BANK(info->recmema); + rank = REC_RANK(info->recmema); + rdwr = REC_RDWR(info->recmema); + ras = REC_RAS(info->recmemb); + cas = REC_CAS(info->recmemb); + + debugf0("\t\tCSROW= %d Channel= %d (Branch %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " + "CAS=%d, CE Err=0x%x)", branch >> 1, bank, + rdwr ? "Write" : "Read", ras, cas, ce_errors); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ce(mci, rank, channel, msg); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_THERMAL; + if (misc_errors) { + i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_NON_RETRY; + if (misc_errors) { + i5000_printk(KERN_WARNING, "\tNON-Retry Errors, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_NORTH_CRC; + if (misc_errors) { + i5000_printk(KERN_WARNING, + "\tNORTHBOUND CRC Error, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_SPD_PROTOCOL; + if (misc_errors) { + i5000_printk(KERN_WARNING, + "\tSPD Protocol Error, bits= 0x%x\n", + misc_errors); + } + + /* See if any of the thermal errors have fired */ + misc_errors = allErrors & FERR_NF_DIMM_SPARE; + if (misc_errors) { + i5000_printk(KERN_WARNING, "\tDIMM-Spare Error, bits= 0x%x\n", + misc_errors); + } +} + +/* + * i5000_process_error_info Process the error info that is + * in the 'info' structure, previously retrieved from hardware + */ +static void i5000_process_error_info(struct mem_ctl_info *mci, + struct i5000_error_info *info, + int handle_errors) +{ + /* First handle any fatal errors that occurred */ + i5000_process_fatal_error_info(mci, info, handle_errors); + + /* now handle any non-fatal errors that occurred */ + i5000_process_nonfatal_error_info(mci, info, handle_errors); +} + +/* + * i5000_clear_error Retrieve any error from the hardware + * but do NOT process that error. + * Used for 'clearing' out of previous errors + * Called by the Core module. + */ +static void i5000_clear_error(struct mem_ctl_info *mci) +{ + struct i5000_error_info info; + + i5000_get_error_info(mci, &info); +} + +/* + * i5000_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void i5000_check_error(struct mem_ctl_info *mci) +{ + struct i5000_error_info info; + debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i5000_get_error_info(mci, &info); + i5000_process_error_info(mci, &info, 1); +} + +/* + * i5000_get_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int i5000_get_devices(struct mem_ctl_info *mci, int dev_idx) +{ + //const struct i5000_dev_info *i5000_dev = &i5000_devs[dev_idx]; + struct i5000_pvt *pvt; + struct pci_dev *pdev; + + pvt = mci->pvt_info; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (1) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16, pdev); + + /* End of list, leave */ + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x FUNC 1 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16); + + return 1; + } + + /* Scan for device 16 func 1 */ + if (PCI_FUNC(pdev->devfn) == 1) + break; + } + + pvt->branchmap_werrors = pdev; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (1) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16, pdev); + + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "MC: 'branchmap,control,errors' " + "device not found:" + "vendor 0x%x device 0x%x Func 2 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I5000_DEV16); + + pci_dev_put(pvt->branchmap_werrors); + return 1; + } + + /* Scan for device 16 func 1 */ + if (PCI_FUNC(pdev->devfn) == 2) + break; + } + + pvt->fsb_error_regs = pdev; + + debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + + pdev = NULL; + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_I5000_BRANCH_0, pdev); + + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "MC: 'BRANCH 0' device not found:" + "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_I5000_BRANCH_0); + + pci_dev_put(pvt->branchmap_werrors); + pci_dev_put(pvt->fsb_error_regs); + return 1; + } + + pvt->branch_0 = pdev; + + /* If this device claims to have more than 2 channels then + * fetch Branch 1's information + */ + if (pvt->maxch >= CHANNELS_PER_BRANCH) { + pdev = NULL; + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_I5000_BRANCH_1, pdev); + + if (pdev == NULL) { + i5000_printk(KERN_ERR, + "MC: 'BRANCH 1' device not found:" + "vendor 0x%x device 0x%x Func 0 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_I5000_BRANCH_1); + + pci_dev_put(pvt->branchmap_werrors); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branch_0); + return 1; + } + + pvt->branch_1 = pdev; + } + + return 0; +} + +/* + * i5000_put_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void i5000_put_devices(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + + pvt = mci->pvt_info; + + pci_dev_put(pvt->branchmap_werrors); /* FUNC 1 */ + pci_dev_put(pvt->fsb_error_regs); /* FUNC 2 */ + pci_dev_put(pvt->branch_0); /* DEV 21 */ + + /* Only if more than 2 channels do we release the second branch */ + if (pvt->maxch >= CHANNELS_PER_BRANCH) + pci_dev_put(pvt->branch_1); /* DEV 22 */ +} + +/* + * determine_amb_resent + * + * the information is contained in NUM_MTRS different registers + * determineing which of the NUM_MTRS requires knowing + * which channel is in question + * + * 2 branches, each with 2 channels + * b0_ambpresent0 for channel '0' + * b0_ambpresent1 for channel '1' + * b1_ambpresent0 for channel '2' + * b1_ambpresent1 for channel '3' + */ +static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel) +{ + int amb_present; + + if (channel < CHANNELS_PER_BRANCH) { + if (channel & 0x1) + amb_present = pvt->b0_ambpresent1; + else + amb_present = pvt->b0_ambpresent0; + } else { + if (channel & 0x1) + amb_present = pvt->b1_ambpresent1; + else + amb_present = pvt->b1_ambpresent0; + } + + return amb_present; +} + +/* + * determine_mtr(pvt, csrow, channel) + * + * return the proper MTR register as determine by the csrow and channel desired + */ +static int determine_mtr(struct i5000_pvt *pvt, int csrow, int channel) +{ + int mtr; + + if (channel < CHANNELS_PER_BRANCH) + mtr = pvt->b0_mtr[csrow >> 1]; + else + mtr = pvt->b1_mtr[csrow >> 1]; + + return mtr; +} + +/* + */ +static void decode_mtr(int slot_row, u16 mtr) +{ + int ans; + + ans = MTR_DIMMS_PRESENT(mtr); + + debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, + ans ? "Present" : "NOT Present"); + if (!ans) + return; + + debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); + debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); + debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); +} + +static void handle_channel(struct i5000_pvt *pvt, int csrow, int channel, + struct i5000_dimm_info *dinfo) +{ + int mtr; + int amb_present_reg; + int addrBits; + + mtr = determine_mtr(pvt, csrow, channel); + if (MTR_DIMMS_PRESENT(mtr)) { + amb_present_reg = determine_amb_present_reg(pvt, channel); + + /* Determine if there is a DIMM present in this DIMM slot */ + if (amb_present_reg & (1 << (csrow >> 1))) { + dinfo->dual_rank = MTR_DIMM_RANK(mtr); + + if (!((dinfo->dual_rank == 0) && + ((csrow & 0x1) == 0x1))) { + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); + /* Add thenumber of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; + } + } + } +} + +/* + * calculate_dimm_size + * + * also will output a DIMM matrix map, if debug is enabled, for viewing + * how the DIMMs are populated + */ +static void calculate_dimm_size(struct i5000_pvt *pvt) +{ + struct i5000_dimm_info *dinfo; + int csrow, max_csrows; + char *p, *mem_buffer; + int space, n; + int channel; + + /* ================= Generate some debug output ================= */ + space = PAGE_SIZE; + mem_buffer = p = kmalloc(space, GFP_KERNEL); + if (p == NULL) { + i5000_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", + __FILE__, __func__); + return; + } + + n = snprintf(p, space, "\n"); + p += n; + space -= n; + + /* Scan all the actual CSROWS (which is # of DIMMS * 2) + * and calculate the information for each DIMM + * Start with the highest csrow first, to display it first + * and work toward the 0th csrow + */ + max_csrows = pvt->maxdimmperch * 2; + for (csrow = max_csrows - 1; csrow >= 0; csrow--) { + + /* on an odd csrow, first output a 'boundary' marker, + * then reset the message buffer */ + if (csrow & 0x1) { + n = snprintf(p, space, "---------------------------" + "--------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + n = snprintf(p, space, "csrow %2d ", csrow); + p += n; + space -= n; + + for (channel = 0; channel < pvt->maxch; channel++) { + dinfo = &pvt->dimm_info[csrow][channel]; + handle_channel(pvt, csrow, channel, dinfo); + n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + p += n; + space -= n; + } + n = snprintf(p, space, "\n"); + p += n; + space -= n; + } + + /* Output the last bottom 'boundary' marker */ + n = snprintf(p, space, "---------------------------" + "--------------------------------\n"); + p += n; + space -= n; + + /* now output the 'channel' labels */ + n = snprintf(p, space, " "); + p += n; + space -= n; + for (channel = 0; channel < pvt->maxch; channel++) { + n = snprintf(p, space, "channel %d | ", channel); + p += n; + space -= n; + } + n = snprintf(p, space, "\n"); + p += n; + space -= n; + + /* output the last message and free buffer */ + debugf2("%s\n", mem_buffer); + kfree(mem_buffer); +} + +/* + * i5000_get_mc_regs read in the necessary registers and + * cache locally + * + * Fills in the private data members + */ +static void i5000_get_mc_regs(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + u32 actual_tolm; + u16 limit; + int slot_row; + int maxch; + int maxdimmperch; + int way0, way1; + + pvt = mci->pvt_info; + + pci_read_config_dword(pvt->system_address, AMBASE, + (u32 *) & pvt->ambase); + pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), + ((u32 *) & pvt->ambase) + sizeof(u32)); + + maxdimmperch = pvt->maxdimmperch; + maxch = pvt->maxch; + + debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", + (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); + + /* Get the Branch Map regs */ + pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pvt->tolm >>= 12; + debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, + pvt->tolm); + + actual_tolm = pvt->tolm << 28; + debugf2("Actual TOLM byte addr=%u (0x%x)\n", actual_tolm, actual_tolm); + + pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); + pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); + pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir2); + + /* Get the MIR[0-2] regs */ + limit = (pvt->mir0 >> 4) & 0x0FFF; + way0 = pvt->mir0 & 0x1; + way1 = pvt->mir0 & 0x2; + debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + limit = (pvt->mir1 >> 4) & 0x0FFF; + way0 = pvt->mir1 & 0x1; + way1 = pvt->mir1 & 0x2; + debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + limit = (pvt->mir2 >> 4) & 0x0FFF; + way0 = pvt->mir2 & 0x1; + way1 = pvt->mir2 & 0x2; + debugf2("MIR2: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + + /* Get the MTR[0-3] regs */ + for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { + int where = MTR0 + (slot_row * sizeof(u32)); + + pci_read_config_word(pvt->branch_0, where, + &pvt->b0_mtr[slot_row]); + + debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, + pvt->b0_mtr[slot_row]); + + if (pvt->maxch >= CHANNELS_PER_BRANCH) { + pci_read_config_word(pvt->branch_1, where, + &pvt->b1_mtr[slot_row]); + debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, + where, pvt->b0_mtr[slot_row]); + } else { + pvt->b1_mtr[slot_row] = 0; + } + } + + /* Read and dump branch 0's MTRs */ + debugf2("\nMemory Technology Registers:\n"); + debugf2(" Branch 0:\n"); + for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { + decode_mtr(slot_row, pvt->b0_mtr[slot_row]); + } + pci_read_config_word(pvt->branch_0, AMB_PRESENT_0, + &pvt->b0_ambpresent0); + debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); + pci_read_config_word(pvt->branch_0, AMB_PRESENT_1, + &pvt->b0_ambpresent1); + debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); + + /* Only if we have 2 branchs (4 channels) */ + if (pvt->maxch < CHANNELS_PER_BRANCH) { + pvt->b1_ambpresent0 = 0; + pvt->b1_ambpresent1 = 0; + } else { + /* Read and dump branch 1's MTRs */ + debugf2(" Branch 1:\n"); + for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) { + decode_mtr(slot_row, pvt->b1_mtr[slot_row]); + } + pci_read_config_word(pvt->branch_1, AMB_PRESENT_0, + &pvt->b1_ambpresent0); + debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", + pvt->b1_ambpresent0); + pci_read_config_word(pvt->branch_1, AMB_PRESENT_1, + &pvt->b1_ambpresent1); + debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", + pvt->b1_ambpresent1); + } + + /* Go and determine the size of each DIMM and place in an + * orderly matrix */ + calculate_dimm_size(pvt); +} + +/* + * i5000_init_csrows Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * + * return: + * 0 success + * 1 no actual memory found on this MC + */ +static int i5000_init_csrows(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + struct csrow_info *p_csrow; + int empty, channel_count; + int max_csrows; + int mtr; + int csrow_megs; + int channel; + int csrow; + + pvt = mci->pvt_info; + + channel_count = pvt->maxch; + max_csrows = pvt->maxdimmperch * 2; + + empty = 1; /* Assume NO memory */ + + for (csrow = 0; csrow < max_csrows; csrow++) { + p_csrow = &mci->csrows[csrow]; + + p_csrow->csrow_idx = csrow; + + /* use branch 0 for the basis */ + mtr = pvt->b0_mtr[csrow >> 1]; + + /* if no DIMMS on this row, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + /* FAKE OUT VALUES, FIXME */ + p_csrow->first_page = 0 + csrow * 20; + p_csrow->last_page = 9 + csrow * 20; + p_csrow->page_mask = 0xFFF; + + p_csrow->grain = 8; + + csrow_megs = 0; + for (channel = 0; channel < pvt->maxch; channel++) { + csrow_megs += pvt->dimm_info[csrow][channel].megabytes; + } + + p_csrow->nr_pages = csrow_megs << 8; + + /* Assume DDR2 for now */ + p_csrow->mtype = MEM_FB_DDR2; + + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->dtype = DEV_X8; + else + p_csrow->dtype = DEV_X4; + + p_csrow->edac_mode = EDAC_S8ECD8ED; + + empty = 0; + } + + return empty; +} + +/* + * i5000_enable_error_reporting + * Turn on the memory reporting features of the hardware + */ +static void i5000_enable_error_reporting(struct mem_ctl_info *mci) +{ + struct i5000_pvt *pvt; + u32 fbd_error_mask; + + pvt = mci->pvt_info; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(ENABLE_EMASK_ALL); + + pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + fbd_error_mask); +} + +/* + * i5000_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels) + * + * ask the device how many channels are present and how many CSROWS + * as well + */ +static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev, + int *num_dimms_per_channel, + int *num_channels) +{ + u8 value; + + /* Need to retrieve just how many channels and dimms per channel are + * supported on this memory controller + */ + pci_read_config_byte(pdev, MAXDIMMPERCH, &value); + *num_dimms_per_channel = (int)value *2; + + pci_read_config_byte(pdev, MAXCH, &value); + *num_channels = (int)value; +} + +/* + * i5000_probe1 Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int i5000_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + struct i5000_pvt *pvt; + int num_channels; + int num_dimms_per_channel; + int num_csrows; + + debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", + __func__, + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* We only are looking for func 0 of the set */ + if (PCI_FUNC(pdev->devfn) != 0) + return -ENODEV; + + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + break; + } + + /* Ask the devices for the number of CSROWS and CHANNELS so + * that we can calculate the memory resources, etc + * + * The Chipset will report what it can handle which will be greater + * or equal to what the motherboard manufacturer will implement. + * + * As we don't have a motherboard identification routine to determine + * actual number of slots/dimms per channel, we thus utilize the + * resource as specified by the chipset. Thus, we might have + * have more DIMMs per channel than actually on the mobo, but this + * allows the driver to support upto the chipset max, without + * some fancy mobo determination. + */ + i5000_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel, + &num_channels); + num_csrows = num_dimms_per_channel * 2; + + debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", + __func__, num_channels, num_dimms_per_channel, num_csrows); + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + + mci->dev = &pdev->dev; /* record ptr to the generic device */ + + pvt = mci->pvt_info; + pvt->system_address = pdev; /* Record this device in our private */ + pvt->maxch = num_channels; + pvt->maxdimmperch = num_dimms_per_channel; + + /* 'get' the pci devices we want to reserve for our use */ + if (i5000_get_devices(mci, dev_idx)) + goto fail0; + + /* Time to get serious */ + i5000_get_mc_regs(mci); /* retrieve the hardware registers */ + + mci->mc_idx = 0; + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "i5000_edac.c"; + mci->mod_ver = I5000_REVISION; + mci->ctl_name = i5000_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + + /* Set the function pointer to an actual operation function */ + mci->edac_check = i5000_check_error; + + /* initialize the MC control structure 'csrows' table + * with the mapping and control information */ + if (i5000_init_csrows(mci)) { + debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" + " because i5000_init_csrows() returned nonzero " + "value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + } else { + debugf1("MC: Enable error reporting now\n"); + i5000_enable_error_reporting(mci); + } + + /* add this new MC control structure to EDAC's list of MCs */ + if (edac_mc_add_mc(mci)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + /* FIXME: perhaps some code should go here that disables error + * reporting if we just enabled it + */ + goto fail1; + } + + i5000_clear_error(mci); + + /* allocating generic PCI control info */ + i5000_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i5000_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + return 0; + + /* Error exit unwinding stack */ +fail1: + + i5000_put_devices(mci); + +fail0: + edac_mc_free(mci); + return -ENODEV; +} + +/* + * i5000_init_one constructor for one instance of device + * + * returns: + * negative on error + * count (>= 0) + */ +static int __devinit i5000_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; + + /* now probe and enable the device */ + return i5000_probe1(pdev, id->driver_data); +} + +/* + * i5000_remove_one destructor for one instance of device + * + */ +static void __devexit i5000_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i5000_pci) + edac_pci_release_generic_ctl(i5000_pci); + + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) + return; + + /* retrieve references to resources, and free those resources */ + i5000_put_devices(mci); + + edac_mc_free(mci); +} + +/* + * pci_device_id table for which devices we are looking for + * + * The "E500P" device is the first device supported. + */ +static const struct pci_device_id i5000_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), + .driver_data = I5000P}, + + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i5000_pci_tbl); + +/* + * i5000_driver pci_driver structure for this module + * + */ +static struct pci_driver i5000_driver = { + .name = __stringify(KBUILD_BASENAME), + .probe = i5000_init_one, + .remove = __devexit_p(i5000_remove_one), + .id_table = i5000_pci_tbl, +}; + +/* + * i5000_init Module entry function + * Try to initialize this module for its devices + */ +static int __init i5000_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + pci_rc = pci_register_driver(&i5000_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +/* + * i5000_exit() Module exit function + * Unregister the driver + */ +static void __exit i5000_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i5000_driver); +} + +module_init(i5000_init); +module_exit(i5000_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR + ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); +MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " + I5000_REVISION); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c new file mode 100644 index 000000000000..83bfe37c4bbb --- /dev/null +++ b/drivers/edac/i82443bxgx_edac.c @@ -0,0 +1,402 @@ +/* + * Intel 82443BX/GX (440BX/GX chipset) Memory Controller EDAC kernel + * module (C) 2006 Tim Small + * + * This file may be distributed under the terms of the GNU General + * Public License. + * + * Written by Tim Small <tim@buttersideup.com>, based on work by Linux + * Networx, Thayne Harbaugh, Dan Hollis <goemon at anime dot net> and + * others. + * + * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>. + * + * Written with reference to 82443BX Host Bridge Datasheet: + * http://www.intel.com/design/chipsets/440/documentation.htm + * references to this document given in []. + * + * This module doesn't support the 440LX, but it may be possible to + * make it do so (the 440LX's register definitions are different, but + * not completely so - I haven't studied them in enough detail to know + * how easy this would be). + */ + +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/pci.h> +#include <linux/pci_ids.h> + +#include <linux/slab.h> + +#include "edac_core.h" + +#define I82443_REVISION "0.1" + +#define EDAC_MOD_STR "i82443bxgx_edac" + +/* The 82443BX supports SDRAM, or EDO (EDO for mobile only), "Memory + * Size: 8 MB to 512 MB (1GB with Registered DIMMs) with eight memory + * rows" "The 82443BX supports multiple-bit error detection and + * single-bit error correction when ECC mode is enabled and + * single/multi-bit error detection when correction is disabled. + * During writes to the DRAM, the 82443BX generates ECC for the data + * on a QWord basis. Partial QWord writes require a read-modify-write + * cycle when ECC is enabled." +*/ + +/* "Additionally, the 82443BX ensures that the data is corrected in + * main memory so that accumulation of errors is prevented. Another + * error within the same QWord would result in a double-bit error + * which is unrecoverable. This is known as hardware scrubbing since + * it requires no software intervention to correct the data in memory." + */ + +/* [Also see page 100 (section 4.3), "DRAM Interface"] + * [Also see page 112 (section 4.6.1.4), ECC] + */ + +#define I82443BXGX_NR_CSROWS 8 +#define I82443BXGX_NR_CHANS 1 +#define I82443BXGX_NR_DIMMS 4 + +/* 82443 PCI Device 0 */ +#define I82443BXGX_NBXCFG 0x50 /* 32bit register starting at this PCI + * config space offset */ +#define I82443BXGX_NBXCFG_OFFSET_NON_ECCROW 24 /* Array of bits, zero if + * row is non-ECC */ +#define I82443BXGX_NBXCFG_OFFSET_DRAM_FREQ 12 /* 2 bits,00=100MHz,10=66 MHz */ + +#define I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY 7 /* 2 bits: */ +#define I82443BXGX_NBXCFG_INTEGRITY_NONE 0x0 /* 00 = Non-ECC */ +#define I82443BXGX_NBXCFG_INTEGRITY_EC 0x1 /* 01 = EC (only) */ +#define I82443BXGX_NBXCFG_INTEGRITY_ECC 0x2 /* 10 = ECC */ +#define I82443BXGX_NBXCFG_INTEGRITY_SCRUB 0x3 /* 11 = ECC + HW Scrub */ + +#define I82443BXGX_NBXCFG_OFFSET_ECC_DIAG_ENABLE 6 + +/* 82443 PCI Device 0 */ +#define I82443BXGX_EAP 0x80 /* 32bit register starting at this PCI + * config space offset, Error Address + * Pointer Register */ +#define I82443BXGX_EAP_OFFSET_EAP 12 /* High 20 bits of error address */ +#define I82443BXGX_EAP_OFFSET_MBE BIT(1) /* Err at EAP was multi-bit (W1TC) */ +#define I82443BXGX_EAP_OFFSET_SBE BIT(0) /* Err at EAP was single-bit (W1TC) */ + +#define I82443BXGX_ERRCMD 0x90 /* 8bit register starting at this PCI + * config space offset. */ +#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_MBE BIT(1) /* 1 = enable */ +#define I82443BXGX_ERRCMD_OFFSET_SERR_ON_SBE BIT(0) /* 1 = enable */ + +#define I82443BXGX_ERRSTS 0x91 /* 16bit register starting at this PCI + * config space offset. */ +#define I82443BXGX_ERRSTS_OFFSET_MBFRE 5 /* 3 bits - first err row multibit */ +#define I82443BXGX_ERRSTS_OFFSET_MEF BIT(4) /* 1 = MBE occurred */ +#define I82443BXGX_ERRSTS_OFFSET_SBFRE 1 /* 3 bits - first err row singlebit */ +#define I82443BXGX_ERRSTS_OFFSET_SEF BIT(0) /* 1 = SBE occurred */ + +#define I82443BXGX_DRAMC 0x57 /* 8bit register starting at this PCI + * config space offset. */ +#define I82443BXGX_DRAMC_OFFSET_DT 3 /* 2 bits, DRAM Type */ +#define I82443BXGX_DRAMC_DRAM_IS_EDO 0 /* 00 = EDO */ +#define I82443BXGX_DRAMC_DRAM_IS_SDRAM 1 /* 01 = SDRAM */ +#define I82443BXGX_DRAMC_DRAM_IS_RSDRAM 2 /* 10 = Registered SDRAM */ + +#define I82443BXGX_DRB 0x60 /* 8x 8bit registers starting at this PCI + * config space offset. */ + +/* FIXME - don't poll when ECC disabled? */ + +struct i82443bxgx_edacmc_error_info { + u32 eap; +}; + +static struct edac_pci_ctl_info *i82443bxgx_pci; + +static void i82443bxgx_edacmc_get_error_info(struct mem_ctl_info *mci, + struct i82443bxgx_edacmc_error_info + *info) +{ + struct pci_dev *pdev; + pdev = to_pci_dev(mci->dev); + pci_read_config_dword(pdev, I82443BXGX_EAP, &info->eap); + if (info->eap & I82443BXGX_EAP_OFFSET_SBE) + /* Clear error to allow next error to be reported [p.61] */ + pci_write_bits32(pdev, I82443BXGX_EAP, + I82443BXGX_EAP_OFFSET_SBE, + I82443BXGX_EAP_OFFSET_SBE); + + if (info->eap & I82443BXGX_EAP_OFFSET_MBE) + /* Clear error to allow next error to be reported [p.61] */ + pci_write_bits32(pdev, I82443BXGX_EAP, + I82443BXGX_EAP_OFFSET_MBE, + I82443BXGX_EAP_OFFSET_MBE); +} + +static int i82443bxgx_edacmc_process_error_info(struct mem_ctl_info *mci, + struct + i82443bxgx_edacmc_error_info + *info, int handle_errors) +{ + int error_found = 0; + u32 eapaddr, page, pageoffset; + + /* bits 30:12 hold the 4kb block in which the error occurred + * [p.61] */ + eapaddr = (info->eap & 0xfffff000); + page = eapaddr >> PAGE_SHIFT; + pageoffset = eapaddr - (page << PAGE_SHIFT); + + if (info->eap & I82443BXGX_EAP_OFFSET_SBE) { + error_found = 1; + if (handle_errors) + edac_mc_handle_ce(mci, page, pageoffset, + /* 440BX/GX don't make syndrome information + * available */ + 0, edac_mc_find_csrow_by_page(mci, page), 0, + mci->ctl_name); + } + + if (info->eap & I82443BXGX_EAP_OFFSET_MBE) { + error_found = 1; + if (handle_errors) + edac_mc_handle_ue(mci, page, pageoffset, + edac_mc_find_csrow_by_page(mci, page), + mci->ctl_name); + } + + return error_found; +} + +static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci) +{ + struct i82443bxgx_edacmc_error_info info; + + debugf1("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i82443bxgx_edacmc_get_error_info(mci, &info); + i82443bxgx_edacmc_process_error_info(mci, &info, 1); +} + +static void i82443bxgx_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, + enum edac_type edac_mode, + enum mem_type mtype) +{ + struct csrow_info *csrow; + int index; + u8 drbar, dramc; + u32 row_base, row_high_limit, row_high_limit_last; + + pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc); + row_high_limit_last = 0; + for (index = 0; index < mci->nr_csrows; index++) { + csrow = &mci->csrows[index]; + pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar); + debugf1("MC%d: " __FILE__ ": %s() Row=%d DRB = %#0x\n", + mci->mc_idx, __func__, index, drbar); + row_high_limit = ((u32) drbar << 23); + /* find the DRAM Chip Select Base address and mask */ + debugf1("MC%d: " __FILE__ ": %s() Row=%d, " + "Boundry Address=%#0x, Last = %#0x \n", + mci->mc_idx, __func__, index, row_high_limit, + row_high_limit_last); + + /* 440GX goes to 2GB, represented with a DRB of 0. */ + if (row_high_limit_last && !row_high_limit) + row_high_limit = 1UL << 31; + + /* This row is empty [p.49] */ + if (row_high_limit == row_high_limit_last) + continue; + row_base = row_high_limit_last; + csrow->first_page = row_base >> PAGE_SHIFT; + csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1; + csrow->nr_pages = csrow->last_page - csrow->first_page + 1; + /* EAP reports in 4kilobyte granularity [61] */ + csrow->grain = 1 << 12; + csrow->mtype = mtype; + /* I don't think 440BX can tell you device type? FIXME? */ + csrow->dtype = DEV_UNKNOWN; + /* Mode is global to all rows on 440BX */ + csrow->edac_mode = edac_mode; + row_high_limit_last = row_high_limit; + } +} + +static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + u8 dramc; + u32 nbxcfg, ecc_mode; + enum mem_type mtype; + enum edac_type edac_mode; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* Something is really hosed if PCI config space reads from + * the MC aren't working. + */ + if (pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg)) + return -EIO; + + mci = edac_mc_alloc(0, I82443BXGX_NR_CSROWS, I82443BXGX_NR_CHANS, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; + pci_read_config_byte(pdev, I82443BXGX_DRAMC, &dramc); + switch ((dramc >> I82443BXGX_DRAMC_OFFSET_DT) & (BIT(0) | BIT(1))) { + case I82443BXGX_DRAMC_DRAM_IS_EDO: + mtype = MEM_EDO; + break; + case I82443BXGX_DRAMC_DRAM_IS_SDRAM: + mtype = MEM_SDR; + break; + case I82443BXGX_DRAMC_DRAM_IS_RSDRAM: + mtype = MEM_RDR; + break; + default: + debugf0("Unknown/reserved DRAM type value " + "in DRAMC register!\n"); + mtype = -MEM_UNKNOWN; + } + + if ((mtype == MEM_SDR) || (mtype == MEM_RDR)) + mci->edac_cap = mci->edac_ctl_cap; + else + mci->edac_cap = EDAC_FLAG_NONE; + + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + pci_read_config_dword(pdev, I82443BXGX_NBXCFG, &nbxcfg); + ecc_mode = ((nbxcfg >> I82443BXGX_NBXCFG_OFFSET_DRAM_INTEGRITY) & + (BIT(0) | BIT(1))); + + mci->scrub_mode = (ecc_mode == I82443BXGX_NBXCFG_INTEGRITY_SCRUB) + ? SCRUB_HW_SRC : SCRUB_NONE; + + switch (ecc_mode) { + case I82443BXGX_NBXCFG_INTEGRITY_NONE: + edac_mode = EDAC_NONE; + break; + case I82443BXGX_NBXCFG_INTEGRITY_EC: + edac_mode = EDAC_EC; + break; + case I82443BXGX_NBXCFG_INTEGRITY_ECC: + case I82443BXGX_NBXCFG_INTEGRITY_SCRUB: + edac_mode = EDAC_SECDED; + break; + default: + debugf0("%s(): Unknown/reserved ECC state " + "in NBXCFG register!\n", __func__); + edac_mode = EDAC_UNKNOWN; + break; + } + + i82443bxgx_init_csrows(mci, pdev, edac_mode, mtype); + + /* Many BIOSes don't clear error flags on boot, so do this + * here, or we get "phantom" errors occuring at module-load + * time. */ + pci_write_bits32(pdev, I82443BXGX_EAP, + (I82443BXGX_EAP_OFFSET_SBE | + I82443BXGX_EAP_OFFSET_MBE), + (I82443BXGX_EAP_OFFSET_SBE | + I82443BXGX_EAP_OFFSET_MBE)); + + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I82443_REVISION; + mci->ctl_name = "I82443BXGX"; + mci->dev_name = pci_name(pdev); + mci->edac_check = i82443bxgx_edacmc_check; + mci->ctl_page_to_phys = NULL; + + if (edac_mc_add_mc(mci)) { + debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + goto fail; + } + + /* allocating generic PCI control info */ + i82443bxgx_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i82443bxgx_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + debugf3("MC: " __FILE__ ": %s(): success\n", __func__); + return 0; + +fail: + edac_mc_free(mci); + return -ENODEV; +} + +EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1); + +/* returns count (>= 0), or negative on error */ +static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* don't need to call pci_device_enable() */ + return i82443bxgx_edacmc_probe1(pdev, ent->driver_data); +} + +static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i82443bxgx_pci) + edac_pci_release_generic_ctl(i82443bxgx_pci); + + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) + return; + + edac_mc_free(mci); +} + +EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); + +static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2)}, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl); + +static struct pci_driver i82443bxgx_edacmc_driver = { + .name = EDAC_MOD_STR, + .probe = i82443bxgx_edacmc_init_one, + .remove = __devexit_p(i82443bxgx_edacmc_remove_one), + .id_table = i82443bxgx_pci_tbl, +}; + +static int __init i82443bxgx_edacmc_init(void) +{ + return pci_register_driver(&i82443bxgx_edacmc_driver); +} + +static void __exit i82443bxgx_edacmc_exit(void) +{ + pci_unregister_driver(&i82443bxgx_edacmc_driver); +} + +module_init(i82443bxgx_edacmc_init); +module_exit(i82443bxgx_edacmc_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD"); +MODULE_DESCRIPTION("EDAC MC support for Intel 82443BX/GX memory controllers"); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index e4bb298e613f..f5ecd2c4d813 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -14,9 +14,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define I82860_REVISION " Ver: 2.0.1 " __DATE__ +#define I82860_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "i82860_edac" #define i82860_printk(level, fmt, arg...) \ @@ -54,16 +54,16 @@ struct i82860_error_info { static const struct i82860_dev_info i82860_devs[] = { [I82860] = { - .ctl_name = "i82860" - }, + .ctl_name = "i82860"}, }; -static struct pci_dev *mci_pdev = NULL; /* init dev: in case that AGP code +static struct pci_dev *mci_pdev; /* init dev: in case that AGP code * has already registered driver */ +static struct edac_pci_ctl_info *i82860_pci; static void i82860_get_error_info(struct mem_ctl_info *mci, - struct i82860_error_info *info) + struct i82860_error_info *info) { struct pci_dev *pdev; @@ -91,13 +91,13 @@ static void i82860_get_error_info(struct mem_ctl_info *mci, if ((info->errsts ^ info->errsts2) & 0x0003) { pci_read_config_dword(pdev, I82860_EAP, &info->eap); - pci_read_config_word(pdev, I82860_DERRCTL_STS, - &info->derrsyn); + pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn); } } static int i82860_process_error_info(struct mem_ctl_info *mci, - struct i82860_error_info *info, int handle_errors) + struct i82860_error_info *info, + int handle_errors) { int row; @@ -136,7 +136,7 @@ static void i82860_check(struct mem_ctl_info *mci) static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev) { unsigned long last_cumul_size; - u16 mchcfg_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ + u16 mchcfg_ddim; /* DRAM Data Integrity Mode 0=none, 2=edac */ u16 value; u32 cumul_size; struct csrow_info *csrow; @@ -155,7 +155,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev) csrow = &mci->csrows[index]; pci_read_config_word(pdev, I82860_GBA + index * 2, &value); cumul_size = (value & I82860_GBA_MASK) << - (I82860_GBA_SHIFT - PAGE_SHIFT); + (I82860_GBA_SHIFT - PAGE_SHIFT); debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, cumul_size); @@ -186,7 +186,7 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) the channel and the GRA registers map to physical devices so we are going to make 1 channel for group. */ - mci = edac_mc_alloc(0, 16, 1); + mci = edac_mc_alloc(0, 16, 1, 0); if (!mci) return -ENOMEM; @@ -200,19 +200,31 @@ static int i82860_probe1(struct pci_dev *pdev, int dev_idx) mci->mod_name = EDAC_MOD_STR; mci->mod_ver = I82860_REVISION; mci->ctl_name = i82860_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = i82860_check; mci->ctl_page_to_phys = NULL; i82860_init_csrows(mci, pdev); - i82860_get_error_info(mci, &discard); /* clear counters */ + i82860_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } + /* allocating generic PCI control info */ + i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i82860_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); @@ -225,7 +237,7 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit i82860_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; @@ -249,6 +261,9 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (i82860_pci) + edac_pci_release_generic_ctl(i82860_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; @@ -257,12 +272,11 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) static const struct pci_device_id i82860_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - I82860 - }, + PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I82860}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, i82860_pci_tbl); @@ -329,5 +343,5 @@ module_exit(i82860_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " - "Ben Woodard <woodard@redhat.com>"); + "Ben Woodard <woodard@redhat.com>"); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 2800b3e614a9..031abadc439a 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -18,9 +18,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define I82875P_REVISION " Ver: 2.0.1 " __DATE__ +#define I82875P_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "i82875p_edac" #define i82875p_printk(level, fmt, arg...) \ @@ -174,18 +174,19 @@ struct i82875p_error_info { static const struct i82875p_dev_info i82875p_devs[] = { [I82875P] = { - .ctl_name = "i82875p" - }, + .ctl_name = "i82875p"}, }; -static struct pci_dev *mci_pdev = NULL; /* init dev: in case that AGP code has +static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has * already registered driver */ static int i82875p_registered = 1; +static struct edac_pci_ctl_info *i82875p_pci; + static void i82875p_get_error_info(struct mem_ctl_info *mci, - struct i82875p_error_info *info) + struct i82875p_error_info *info) { struct pci_dev *pdev; @@ -197,38 +198,39 @@ static void i82875p_get_error_info(struct mem_ctl_info *mci, * overwritten by UE. */ pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts); + + if (!(info->errsts & 0x0081)) + return; + pci_read_config_dword(pdev, I82875P_EAP, &info->eap); pci_read_config_byte(pdev, I82875P_DES, &info->des); pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn); pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts2); - pci_write_bits16(pdev, I82875P_ERRSTS, 0x0081, 0x0081); - /* * If the error is the same then we can for both reads then * the first set of reads is valid. If there is a change then * there is a CE no info and the second set of reads is valid * and should be UE info. */ - if (!(info->errsts2 & 0x0081)) - return; - if ((info->errsts ^ info->errsts2) & 0x0081) { pci_read_config_dword(pdev, I82875P_EAP, &info->eap); pci_read_config_byte(pdev, I82875P_DES, &info->des); - pci_read_config_byte(pdev, I82875P_DERRSYN, - &info->derrsyn); + pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn); } + + pci_write_bits16(pdev, I82875P_ERRSTS, 0x0081, 0x0081); } static int i82875p_process_error_info(struct mem_ctl_info *mci, - struct i82875p_error_info *info, int handle_errors) + struct i82875p_error_info *info, + int handle_errors) { int row, multi_chan; multi_chan = mci->csrows[0].nr_channels - 1; - if (!(info->errsts2 & 0x0081)) + if (!(info->errsts & 0x0081)) return 0; if (!handle_errors) @@ -263,10 +265,12 @@ static void i82875p_check(struct mem_ctl_info *mci) /* Return 0 on success or 1 on failure. */ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, - struct pci_dev **ovrfl_pdev, void __iomem **ovrfl_window) + struct pci_dev **ovrfl_pdev, + void __iomem **ovrfl_window) { struct pci_dev *dev; void __iomem *window; + int err; *ovrfl_pdev = NULL; *ovrfl_window = NULL; @@ -284,14 +288,19 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (dev == NULL) return 1; - pci_bus_add_device(dev); + err = pci_bus_add_device(dev); + if (err) { + i82875p_printk(KERN_ERR, + "%s(): pci_bus_add_device() Failed\n", + __func__); + } } *ovrfl_pdev = dev; if (pci_enable_device(dev)) { i82875p_printk(KERN_ERR, "%s(): Failed to enable overflow " - "device\n", __func__); + "device\n", __func__); return 1; } @@ -307,7 +316,7 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, if (window == NULL) { i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n", - __func__); + __func__); goto fail1; } @@ -325,21 +334,20 @@ fail0: return 1; } - /* Return 1 if dual channel mode is active. Else return 0. */ static inline int dual_channel_active(u32 drc) { return (drc >> 21) & 0x1; } - static void i82875p_init_csrows(struct mem_ctl_info *mci, - struct pci_dev *pdev, void __iomem *ovrfl_window, u32 drc) + struct pci_dev *pdev, + void __iomem * ovrfl_window, u32 drc) { struct csrow_info *csrow; unsigned long last_cumul_size; u8 value; - u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ + u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ u32 cumul_size; int index; @@ -392,7 +400,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) drc = readl(ovrfl_window + I82875P_DRC); nr_chans = dual_channel_active(drc) + 1; mci = edac_mc_alloc(sizeof(*pvt), I82875P_NR_CSROWS(nr_chans), - nr_chans); + nr_chans, 0); if (!mci) { rc = -ENOMEM; @@ -407,23 +415,35 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) mci->mod_name = EDAC_MOD_STR; mci->mod_ver = I82875P_REVISION; mci->ctl_name = i82875p_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); mci->edac_check = i82875p_check; mci->ctl_page_to_phys = NULL; debugf3("%s(): init pvt\n", __func__); - pvt = (struct i82875p_pvt *) mci->pvt_info; + pvt = (struct i82875p_pvt *)mci->pvt_info; pvt->ovrfl_pdev = ovrfl_pdev; pvt->ovrfl_window = ovrfl_window; i82875p_init_csrows(mci, pdev, ovrfl_window, drc); - i82875p_get_error_info(mci, &discard); /* clear counters */ + i82875p_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail1; } + /* allocating generic PCI control info */ + i82875p_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i82875p_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + /* get this far and it's successful */ debugf3("%s(): success\n", __func__); return 0; @@ -442,7 +462,7 @@ fail0: /* returns count (>= 0), or negative on error */ static int __devinit i82875p_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; @@ -467,10 +487,13 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (i82875p_pci) + edac_pci_release_generic_ctl(i82875p_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; - pvt = (struct i82875p_pvt *) mci->pvt_info; + pvt = (struct i82875p_pvt *)mci->pvt_info; if (pvt->ovrfl_window) iounmap(pvt->ovrfl_window); @@ -488,12 +511,11 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { { - PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, - I82875P - }, + PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I82875P}, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl); @@ -517,7 +539,7 @@ static int __init i82875p_init(void) if (mci_pdev == NULL) { mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82875_0, NULL); + PCI_DEVICE_ID_INTEL_82875_0, NULL); if (!mci_pdev) { debugf0("875p pci_get_device fail\n"); diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c new file mode 100644 index 000000000000..0ee888456932 --- /dev/null +++ b/drivers/edac/i82975x_edac.c @@ -0,0 +1,666 @@ +/* + * Intel 82975X Memory Controller kernel module + * (C) 2007 aCarLab (India) Pvt. Ltd. (http://acarlab.com) + * (C) 2007 jetzbroadband (http://jetzbroadband.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written by Arvind R. + * Copied from i82875p_edac.c source: + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> + +#include "edac_core.h" + +#define I82975X_REVISION " Ver: 1.0.0 " __DATE__ +#define EDAC_MOD_STR "i82975x_edac" + +#define i82975x_printk(level, fmt, arg...) \ + edac_printk(level, "i82975x", fmt, ##arg) + +#define i82975x_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i82975x", fmt, ##arg) + +#ifndef PCI_DEVICE_ID_INTEL_82975_0 +#define PCI_DEVICE_ID_INTEL_82975_0 0x277c +#endif /* PCI_DEVICE_ID_INTEL_82975_0 */ + +#define I82975X_NR_CSROWS(nr_chans) (8/(nr_chans)) + +/* Intel 82975X register addresses - device 0 function 0 - DRAM Controller */ +#define I82975X_EAP 0x58 /* Dram Error Address Pointer (32b) + * + * 31:7 128 byte cache-line address + * 6:1 reserved + * 0 0: CH0; 1: CH1 + */ + +#define I82975X_DERRSYN 0x5c /* Dram Error SYNdrome (8b) + * + * 7:0 DRAM ECC Syndrome + */ + +#define I82975X_DES 0x5d /* Dram ERRor DeSTination (8b) + * 0h: Processor Memory Reads + * 1h:7h reserved + * More - See Page 65 of Intel DocSheet. + */ + +#define I82975X_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15:12 reserved + * 11 Thermal Sensor Event + * 10 reserved + * 9 non-DRAM lock error (ndlock) + * 8 Refresh Timeout + * 7:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +/* Error Reporting is supported by 3 mechanisms: + 1. DMI SERR generation ( ERRCMD ) + 2. SMI DMI generation ( SMICMD ) + 3. SCI DMI generation ( SCICMD ) +NOTE: Only ONE of the three must be enabled +*/ +#define I82975X_ERRCMD 0xca /* Error Command (16b) + * + * 15:12 reserved + * 11 Thermal Sensor Event + * 10 reserved + * 9 non-DRAM lock error (ndlock) + * 8 Refresh Timeout + * 7:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +#define I82975X_SMICMD 0xcc /* Error Command (16b) + * + * 15:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +#define I82975X_SCICMD 0xce /* Error Command (16b) + * + * 15:2 reserved + * 1 ECC UE (multibit DRAM error) + * 0 ECC CE (singlebit DRAM error) + */ + +#define I82975X_XEAP 0xfc /* Extended Dram Error Address Pointer (8b) + * + * 7:1 reserved + * 0 Bit32 of the Dram Error Address + */ + +#define I82975X_MCHBAR 0x44 /* + * + * 31:14 Base Addr of 16K memory-mapped + * configuration space + * 13:1 reserverd + * 0 mem-mapped config space enable + */ + +/* NOTE: Following addresses have to indexed using MCHBAR offset (44h, 32b) */ +/* Intel 82975x memory mapped register space */ + +#define I82975X_DRB_SHIFT 25 /* fixed 32MiB grain */ + +#define I82975X_DRB 0x100 /* DRAM Row Boundary (8b x 8) + * + * 7 set to 1 in highest DRB of + * channel if 4GB in ch. + * 6:2 upper boundary of rank in + * 32MB grains + * 1:0 set to 0 + */ +#define I82975X_DRB_CH0R0 0x100 +#define I82975X_DRB_CH0R1 0x101 +#define I82975X_DRB_CH0R2 0x102 +#define I82975X_DRB_CH0R3 0x103 +#define I82975X_DRB_CH1R0 0x180 +#define I82975X_DRB_CH1R1 0x181 +#define I82975X_DRB_CH1R2 0x182 +#define I82975X_DRB_CH1R3 0x183 + + +#define I82975X_DRA 0x108 /* DRAM Row Attribute (4b x 8) + * defines the PAGE SIZE to be used + * for the rank + * 7 reserved + * 6:4 row attr of odd rank, i.e. 1 + * 3 reserved + * 2:0 row attr of even rank, i.e. 0 + * + * 000 = unpopulated + * 001 = reserved + * 010 = 4KiB + * 011 = 8KiB + * 100 = 16KiB + * others = reserved + */ +#define I82975X_DRA_CH0R01 0x108 +#define I82975X_DRA_CH0R23 0x109 +#define I82975X_DRA_CH1R01 0x188 +#define I82975X_DRA_CH1R23 0x189 + + +#define I82975X_BNKARC 0x10e /* Type of device in each rank - Bank Arch (16b) + * + * 15:8 reserved + * 7:6 Rank 3 architecture + * 5:4 Rank 2 architecture + * 3:2 Rank 1 architecture + * 1:0 Rank 0 architecture + * + * 00 => x16 devices; i.e 4 banks + * 01 => x8 devices; i.e 8 banks + */ +#define I82975X_C0BNKARC 0x10e +#define I82975X_C1BNKARC 0x18e + + + +#define I82975X_DRC 0x120 /* DRAM Controller Mode0 (32b) + * + * 31:30 reserved + * 29 init complete + * 28:11 reserved, according to Intel + * 22:21 number of channels + * 00=1 01=2 in 82875 + * seems to be ECC mode + * bits in 82975 in Asus + * P5W + * 19:18 Data Integ Mode + * 00=none 01=ECC in 82875 + * 10:8 refresh mode + * 7 reserved + * 6:4 mode select + * 3:2 reserved + * 1:0 DRAM type 10=Second Revision + * DDR2 SDRAM + * 00, 01, 11 reserved + */ +#define I82975X_DRC_CH0M0 0x120 +#define I82975X_DRC_CH1M0 0x1A0 + + +#define I82975X_DRC_M1 0x124 /* DRAM Controller Mode1 (32b) + * 31 0=Standard Address Map + * 1=Enhanced Address Map + * 30:0 reserved + */ + +#define I82975X_DRC_CH0M1 0x124 +#define I82975X_DRC_CH1M1 0x1A4 + +enum i82975x_chips { + I82975X = 0, +}; + +struct i82975x_pvt { + void __iomem *mch_window; +}; + +struct i82975x_dev_info { + const char *ctl_name; +}; + +struct i82975x_error_info { + u16 errsts; + u32 eap; + u8 des; + u8 derrsyn; + u16 errsts2; + u8 chan; /* the channel is bit 0 of EAP */ + u8 xeap; /* extended eap bit */ +}; + +static const struct i82975x_dev_info i82975x_devs[] = { + [I82975X] = { + .ctl_name = "i82975x" + }, +}; + +static struct pci_dev *mci_pdev; /* init dev: in case that AGP code has + * already registered driver + */ + +static int i82975x_registered = 1; + +static void i82975x_get_error_info(struct mem_ctl_info *mci, + struct i82975x_error_info *info) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(mci->dev); + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, I82975X_ERRSTS, &info->errsts); + pci_read_config_dword(pdev, I82975X_EAP, &info->eap); + pci_read_config_byte(pdev, I82975X_XEAP, &info->xeap); + pci_read_config_byte(pdev, I82975X_DES, &info->des); + pci_read_config_byte(pdev, I82975X_DERRSYN, &info->derrsyn); + pci_read_config_word(pdev, I82975X_ERRSTS, &info->errsts2); + + pci_write_bits16(pdev, I82975X_ERRSTS, 0x0003, 0x0003); + + /* + * If the error is the same then we can for both reads then + * the first set of reads is valid. If there is a change then + * there is a CE no info and the second set of reads is valid + * and should be UE info. + */ + if (!(info->errsts2 & 0x0003)) + return; + + if ((info->errsts ^ info->errsts2) & 0x0003) { + pci_read_config_dword(pdev, I82975X_EAP, &info->eap); + pci_read_config_byte(pdev, I82975X_XEAP, &info->xeap); + pci_read_config_byte(pdev, I82975X_DES, &info->des); + pci_read_config_byte(pdev, I82975X_DERRSYN, + &info->derrsyn); + } +} + +static int i82975x_process_error_info(struct mem_ctl_info *mci, + struct i82975x_error_info *info, int handle_errors) +{ + int row, multi_chan, chan; + + multi_chan = mci->csrows[0].nr_channels - 1; + + if (!(info->errsts2 & 0x0003)) + return 0; + + if (!handle_errors) + return 1; + + if ((info->errsts ^ info->errsts2) & 0x0003) { + edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + info->errsts = info->errsts2; + } + + chan = info->eap & 1; + info->eap >>= 1; + if (info->xeap ) + info->eap |= 0x80000000; + info->eap >>= PAGE_SHIFT; + row = edac_mc_find_csrow_by_page(mci, info->eap); + + if (info->errsts & 0x0002) + edac_mc_handle_ue(mci, info->eap, 0, row, "i82975x UE"); + else + edac_mc_handle_ce(mci, info->eap, 0, info->derrsyn, row, + multi_chan ? chan : 0, + "i82975x CE"); + + return 1; +} + +static void i82975x_check(struct mem_ctl_info *mci) +{ + struct i82975x_error_info info; + + debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + i82975x_get_error_info(mci, &info); + i82975x_process_error_info(mci, &info, 1); +} + +/* Return 1 if dual channel mode is active. Else return 0. */ +static int dual_channel_active(void __iomem *mch_window) +{ + /* + * We treat interleaved-symmetric configuration as dual-channel - EAP's + * bit-0 giving the channel of the error location. + * + * All other configurations are treated as single channel - the EAP's + * bit-0 will resolve ok in symmetric area of mixed + * (symmetric/asymmetric) configurations + */ + u8 drb[4][2]; + int row; + int dualch; + + for (dualch = 1, row = 0; dualch && (row < 4); row++) { + drb[row][0] = readb(mch_window + I82975X_DRB + row); + drb[row][1] = readb(mch_window + I82975X_DRB + row + 0x80); + dualch = dualch && (drb[row][0] == drb[row][1]); + } + return dualch; +} + +static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) +{ + /* + * ASUS P5W DH either does not program this register or programs + * it wrong! + * ECC is possible on i92975x ONLY with DEV_X8 which should mean 'val' + * for each rank should be 01b - the LSB of the word should be 0x55; + * but it reads 0! + */ + return DEV_X8; +} + +static void i82975x_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, void __iomem *mch_window) +{ + struct csrow_info *csrow; + unsigned long last_cumul_size; + u8 value; + u32 cumul_size; + int index; + + last_cumul_size = 0; + + /* + * 82875 comment: + * The dram row boundary (DRB) reg values are boundary address + * for each DRAM row with a granularity of 32 or 64MB (single/dual + * channel operation). DRB regs are cumulative; therefore DRB7 will + * contain the total memory contained in all eight rows. + * + * FIXME: + * EDAC currently works for Dual-channel Interleaved configuration. + * Other configurations, which the chip supports, need fixing/testing. + * + */ + + for (index = 0; index < mci->nr_csrows; index++) { + csrow = &mci->csrows[index]; + + value = readb(mch_window + I82975X_DRB + index + + ((index >= 4) ? 0x80 : 0)); + cumul_size = value; + cumul_size <<= (I82975X_DRB_SHIFT - PAGE_SHIFT); + debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index, + cumul_size); + if (cumul_size == last_cumul_size) + continue; /* not populated */ + + csrow->first_page = last_cumul_size; + csrow->last_page = cumul_size - 1; + csrow->nr_pages = cumul_size - last_cumul_size; + last_cumul_size = cumul_size; + csrow->grain = 1 << 7; /* I82975X_EAP has 128B resolution */ + csrow->mtype = MEM_DDR; /* i82975x supports only DDR2 */ + csrow->dtype = i82975x_dram_type(mch_window, index); + csrow->edac_mode = EDAC_SECDED; /* only supported */ + } +} + +/* #define i82975x_DEBUG_IOMEM */ + +#ifdef i82975x_DEBUG_IOMEM +static void i82975x_print_dram_timings(void __iomem *mch_window) +{ + /* + * The register meanings are from Intel specs; + * (shows 13-5-5-5 for 800-DDR2) + * Asus P5W Bios reports 15-5-4-4 + * What's your religion? + */ + static const int caslats[4] = { 5, 4, 3, 6 }; + u32 dtreg[2]; + + dtreg[0] = readl(mch_window + 0x114); + dtreg[1] = readl(mch_window + 0x194); + i82975x_printk(KERN_INFO, "DRAM Timings : Ch0 Ch1\n" + " RAS Active Min = %d %d\n" + " CAS latency = %d %d\n" + " RAS to CAS = %d %d\n" + " RAS precharge = %d %d\n", + (dtreg[0] >> 19 ) & 0x0f, + (dtreg[1] >> 19) & 0x0f, + caslats[(dtreg[0] >> 8) & 0x03], + caslats[(dtreg[1] >> 8) & 0x03], + ((dtreg[0] >> 4) & 0x07) + 2, + ((dtreg[1] >> 4) & 0x07) + 2, + (dtreg[0] & 0x07) + 2, + (dtreg[1] & 0x07) + 2 + ); + +} +#endif + +static int i82975x_probe1(struct pci_dev *pdev, int dev_idx) +{ + int rc = -ENODEV; + struct mem_ctl_info *mci; + struct i82975x_pvt *pvt; + void __iomem *mch_window; + u32 mchbar; + u32 drc[2]; + struct i82975x_error_info discard; + int chans; +#ifdef i82975x_DEBUG_IOMEM + u8 c0drb[4]; + u8 c1drb[4]; +#endif + + debugf0("%s()\n", __func__); + + pci_read_config_dword(pdev, I82975X_MCHBAR, &mchbar); + if (!(mchbar & 1)) { + debugf3("%s(): failed, MCHBAR disabled!\n", __func__); + goto fail0; + } + mchbar &= 0xffffc000; /* bits 31:14 used for 16K window */ + mch_window = ioremap_nocache(mchbar, 0x1000); + +#ifdef i82975x_DEBUG_IOMEM + i82975x_printk(KERN_INFO, "MCHBAR real = %0x, remapped = %p\n", + mchbar, mch_window); + + c0drb[0] = readb(mch_window + I82975X_DRB_CH0R0); + c0drb[1] = readb(mch_window + I82975X_DRB_CH0R1); + c0drb[2] = readb(mch_window + I82975X_DRB_CH0R2); + c0drb[3] = readb(mch_window + I82975X_DRB_CH0R3); + c1drb[0] = readb(mch_window + I82975X_DRB_CH1R0); + c1drb[1] = readb(mch_window + I82975X_DRB_CH1R1); + c1drb[2] = readb(mch_window + I82975X_DRB_CH1R2); + c1drb[3] = readb(mch_window + I82975X_DRB_CH1R3); + i82975x_printk(KERN_INFO, "DRBCH0R0 = 0x%02x\n", c0drb[0]); + i82975x_printk(KERN_INFO, "DRBCH0R1 = 0x%02x\n", c0drb[1]); + i82975x_printk(KERN_INFO, "DRBCH0R2 = 0x%02x\n", c0drb[2]); + i82975x_printk(KERN_INFO, "DRBCH0R3 = 0x%02x\n", c0drb[3]); + i82975x_printk(KERN_INFO, "DRBCH1R0 = 0x%02x\n", c1drb[0]); + i82975x_printk(KERN_INFO, "DRBCH1R1 = 0x%02x\n", c1drb[1]); + i82975x_printk(KERN_INFO, "DRBCH1R2 = 0x%02x\n", c1drb[2]); + i82975x_printk(KERN_INFO, "DRBCH1R3 = 0x%02x\n", c1drb[3]); +#endif + + drc[0] = readl(mch_window + I82975X_DRC_CH0M0); + drc[1] = readl(mch_window + I82975X_DRC_CH1M0); +#ifdef i82975x_DEBUG_IOMEM + i82975x_printk(KERN_INFO, "DRC_CH0 = %0x, %s\n", drc[0], + ((drc[0] >> 21) & 3) == 1 ? + "ECC enabled" : "ECC disabled"); + i82975x_printk(KERN_INFO, "DRC_CH1 = %0x, %s\n", drc[1], + ((drc[1] >> 21) & 3) == 1 ? + "ECC enabled" : "ECC disabled"); + + i82975x_printk(KERN_INFO, "C0 BNKARC = %0x\n", + readw(mch_window + I82975X_C0BNKARC)); + i82975x_printk(KERN_INFO, "C1 BNKARC = %0x\n", + readw(mch_window + I82975X_C1BNKARC)); + i82975x_print_dram_timings(mch_window); + goto fail1; +#endif + if (!(((drc[0] >> 21) & 3) == 1 || ((drc[1] >> 21) & 3) == 1)) { + i82975x_printk(KERN_INFO, "ECC disabled on both channels.\n"); + goto fail1; + } + + chans = dual_channel_active(mch_window) + 1; + + /* assuming only one controller, index thus is 0 */ + mci = edac_mc_alloc(sizeof(*pvt), I82975X_NR_CSROWS(chans), + chans, 0); + if (!mci) { + rc = -ENOMEM; + goto fail1; + } + + debugf3("%s(): init mci\n", __func__); + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I82975X_REVISION; + mci->ctl_name = i82975x_devs[dev_idx].ctl_name; + mci->edac_check = i82975x_check; + mci->ctl_page_to_phys = NULL; + debugf3("%s(): init pvt\n", __func__); + pvt = (struct i82975x_pvt *) mci->pvt_info; + pvt->mch_window = mch_window; + i82975x_init_csrows(mci, pdev, mch_window); + i82975x_get_error_info(mci, &discard); /* clear counters */ + + /* finalize this instance of memory controller with edac core */ + if (edac_mc_add_mc(mci)) { + debugf3("%s(): failed edac_mc_add_mc()\n", __func__); + goto fail2; + } + + /* get this far and it's successful */ + debugf3("%s(): success\n", __func__); + return 0; + +fail2: + edac_mc_free(mci); + +fail1: + iounmap(mch_window); +fail0: + return rc; +} + +/* returns count (>= 0), or negative on error */ +static int __devinit i82975x_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + debugf0("%s()\n", __func__); + + if (pci_enable_device(pdev) < 0) + return -EIO; + + rc = i82975x_probe1(pdev, ent->driver_data); + + if (mci_pdev == NULL) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void __devexit i82975x_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + struct i82975x_pvt *pvt; + + debugf0("%s()\n", __func__); + + mci = edac_mc_del_mc(&pdev->dev); + if (mci == NULL) + return; + + pvt = mci->pvt_info; + if (pvt->mch_window) + iounmap( pvt->mch_window ); + + edac_mc_free(mci); +} + +static const struct pci_device_id i82975x_pci_tbl[] __devinitdata = { + { + PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I82975X + }, + { + 0, + } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i82975x_pci_tbl); + +static struct pci_driver i82975x_driver = { + .name = EDAC_MOD_STR, + .probe = i82975x_init_one, + .remove = __devexit_p(i82975x_remove_one), + .id_table = i82975x_pci_tbl, +}; + +static int __init i82975x_init(void) +{ + int pci_rc; + + debugf3("%s()\n", __func__); + + pci_rc = pci_register_driver(&i82975x_driver); + if (pci_rc < 0) + goto fail0; + + if (mci_pdev == NULL) { + mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82975_0, NULL); + + if (!mci_pdev) { + debugf0("i82975x pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + + pci_rc = i82975x_init_one(mci_pdev, i82975x_pci_tbl); + + if (pci_rc < 0) { + debugf0("i82975x init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; + +fail1: + pci_unregister_driver(&i82975x_driver); + +fail0: + if (mci_pdev != NULL) + pci_dev_put(mci_pdev); + + return pci_rc; +} + +static void __exit i82975x_exit(void) +{ + debugf3("%s()\n", __func__); + + pci_unregister_driver(&i82975x_driver); + + if (!i82975x_registered) { + i82975x_remove_one(mci_pdev); + pci_dev_put(mci_pdev); + } +} + +module_init(i82975x_init); +module_exit(i82975x_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arvind R. <arvind@acarlab.com>"); +MODULE_DESCRIPTION("MC support for Intel 82975 memory hub controllers"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c new file mode 100644 index 000000000000..e66cdd42a392 --- /dev/null +++ b/drivers/edac/pasemi_edac.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Author: Egor Martovetsky <egor@pasemi.com> + * Maintained by: Olof Johansson <olof@lixom.net> + * + * Driver for the PWRficient onchip memory controllers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include "edac_core.h" + +#define MODULE_NAME "pasemi_edac" + +#define MCCFG_MCEN 0x300 +#define MCCFG_MCEN_MMC_EN 0x00000001 +#define MCCFG_ERRCOR 0x388 +#define MCCFG_ERRCOR_RNK_FAIL_DET_EN 0x00000100 +#define MCCFG_ERRCOR_ECC_GEN_EN 0x00000010 +#define MCCFG_ERRCOR_ECC_CRR_EN 0x00000001 +#define MCCFG_SCRUB 0x384 +#define MCCFG_SCRUB_RGLR_SCRB_EN 0x00000001 +#define MCDEBUG_ERRCTL1 0x728 +#define MCDEBUG_ERRCTL1_RFL_LOG_EN 0x00080000 +#define MCDEBUG_ERRCTL1_MBE_LOG_EN 0x00040000 +#define MCDEBUG_ERRCTL1_SBE_LOG_EN 0x00020000 +#define MCDEBUG_ERRSTA 0x730 +#define MCDEBUG_ERRSTA_RFL_STATUS 0x00000004 +#define MCDEBUG_ERRSTA_MBE_STATUS 0x00000002 +#define MCDEBUG_ERRSTA_SBE_STATUS 0x00000001 +#define MCDEBUG_ERRCNT1 0x734 +#define MCDEBUG_ERRCNT1_SBE_CNT_OVRFLO 0x00000080 +#define MCDEBUG_ERRLOG1A 0x738 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_M 0x30000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_NONE 0x00000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_SBE 0x10000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_MBE 0x20000000 +#define MCDEBUG_ERRLOG1A_MERR_TYPE_RFL 0x30000000 +#define MCDEBUG_ERRLOG1A_MERR_BA_M 0x00700000 +#define MCDEBUG_ERRLOG1A_MERR_BA_S 20 +#define MCDEBUG_ERRLOG1A_MERR_CS_M 0x00070000 +#define MCDEBUG_ERRLOG1A_MERR_CS_S 16 +#define MCDEBUG_ERRLOG1A_SYNDROME_M 0x0000ffff +#define MCDRAM_RANKCFG 0x114 +#define MCDRAM_RANKCFG_EN 0x00000001 +#define MCDRAM_RANKCFG_TYPE_SIZE_M 0x000001c0 +#define MCDRAM_RANKCFG_TYPE_SIZE_S 6 + +#define PASEMI_EDAC_NR_CSROWS 8 +#define PASEMI_EDAC_NR_CHANS 1 +#define PASEMI_EDAC_ERROR_GRAIN 64 + +static int last_page_in_mmc; +static int system_mmc_id; + + +static u32 pasemi_edac_get_error_info(struct mem_ctl_info *mci) +{ + struct pci_dev *pdev = to_pci_dev(mci->dev); + u32 tmp; + + pci_read_config_dword(pdev, MCDEBUG_ERRSTA, + &tmp); + + tmp &= (MCDEBUG_ERRSTA_RFL_STATUS | MCDEBUG_ERRSTA_MBE_STATUS + | MCDEBUG_ERRSTA_SBE_STATUS); + + if (tmp) { + if (tmp & MCDEBUG_ERRSTA_SBE_STATUS) + pci_write_config_dword(pdev, MCDEBUG_ERRCNT1, + MCDEBUG_ERRCNT1_SBE_CNT_OVRFLO); + pci_write_config_dword(pdev, MCDEBUG_ERRSTA, tmp); + } + + return tmp; +} + +static void pasemi_edac_process_error_info(struct mem_ctl_info *mci, u32 errsta) +{ + struct pci_dev *pdev = to_pci_dev(mci->dev); + u32 errlog1a; + u32 cs; + + if (!errsta) + return; + + pci_read_config_dword(pdev, MCDEBUG_ERRLOG1A, &errlog1a); + + cs = (errlog1a & MCDEBUG_ERRLOG1A_MERR_CS_M) >> + MCDEBUG_ERRLOG1A_MERR_CS_S; + + /* uncorrectable/multi-bit errors */ + if (errsta & (MCDEBUG_ERRSTA_MBE_STATUS | + MCDEBUG_ERRSTA_RFL_STATUS)) { + edac_mc_handle_ue(mci, mci->csrows[cs].first_page, 0, + cs, mci->ctl_name); + } + + /* correctable/single-bit errors */ + if (errsta & MCDEBUG_ERRSTA_SBE_STATUS) { + edac_mc_handle_ce(mci, mci->csrows[cs].first_page, 0, + 0, cs, 0, mci->ctl_name); + } +} + +static void pasemi_edac_check(struct mem_ctl_info *mci) +{ + u32 errsta; + + errsta = pasemi_edac_get_error_info(mci); + if (errsta) + pasemi_edac_process_error_info(mci, errsta); +} + +static int pasemi_edac_init_csrows(struct mem_ctl_info *mci, + struct pci_dev *pdev, + enum edac_type edac_mode) +{ + struct csrow_info *csrow; + u32 rankcfg; + int index; + + for (index = 0; index < mci->nr_csrows; index++) { + csrow = &mci->csrows[index]; + + pci_read_config_dword(pdev, + MCDRAM_RANKCFG + (index * 12), + &rankcfg); + + if (!(rankcfg & MCDRAM_RANKCFG_EN)) + continue; + + switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >> + MCDRAM_RANKCFG_TYPE_SIZE_S) { + case 0: + csrow->nr_pages = 128 << (20 - PAGE_SHIFT); + break; + case 1: + csrow->nr_pages = 256 << (20 - PAGE_SHIFT); + break; + case 2: + case 3: + csrow->nr_pages = 512 << (20 - PAGE_SHIFT); + break; + case 4: + csrow->nr_pages = 1024 << (20 - PAGE_SHIFT); + break; + case 5: + csrow->nr_pages = 2048 << (20 - PAGE_SHIFT); + break; + default: + edac_mc_printk(mci, KERN_ERR, + "Unrecognized Rank Config. rankcfg=%u\n", + rankcfg); + return -EINVAL; + } + + csrow->first_page = last_page_in_mmc; + csrow->last_page = csrow->first_page + csrow->nr_pages - 1; + last_page_in_mmc += csrow->nr_pages; + csrow->page_mask = 0; + csrow->grain = PASEMI_EDAC_ERROR_GRAIN; + csrow->mtype = MEM_DDR; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = edac_mode; + } + return 0; +} + +static int __devinit pasemi_edac_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct mem_ctl_info *mci = NULL; + u32 errctl1, errcor, scrub, mcen; + + pci_read_config_dword(pdev, MCCFG_MCEN, &mcen); + if (!(mcen & MCCFG_MCEN_MMC_EN)) + return -ENODEV; + + /* + * We should think about enabling other error detection later on + */ + + pci_read_config_dword(pdev, MCDEBUG_ERRCTL1, &errctl1); + errctl1 |= MCDEBUG_ERRCTL1_SBE_LOG_EN | + MCDEBUG_ERRCTL1_MBE_LOG_EN | + MCDEBUG_ERRCTL1_RFL_LOG_EN; + pci_write_config_dword(pdev, MCDEBUG_ERRCTL1, errctl1); + + mci = edac_mc_alloc(0, PASEMI_EDAC_NR_CSROWS, PASEMI_EDAC_NR_CHANS, + system_mmc_id++); + + if (mci == NULL) + return -ENOMEM; + + pci_read_config_dword(pdev, MCCFG_ERRCOR, &errcor); + errcor |= MCCFG_ERRCOR_RNK_FAIL_DET_EN | + MCCFG_ERRCOR_ECC_GEN_EN | + MCCFG_ERRCOR_ECC_CRR_EN; + + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; + mci->edac_cap = (errcor & MCCFG_ERRCOR_ECC_GEN_EN) ? + ((errcor & MCCFG_ERRCOR_ECC_CRR_EN) ? + (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_EC) : + EDAC_FLAG_NONE; + mci->mod_name = MODULE_NAME; + mci->dev_name = pci_name(pdev); + mci->ctl_name = "pasemi,1682m-mc"; + mci->edac_check = pasemi_edac_check; + mci->ctl_page_to_phys = NULL; + pci_read_config_dword(pdev, MCCFG_SCRUB, &scrub); + mci->scrub_cap = SCRUB_FLAG_HW_PROG | SCRUB_FLAG_HW_SRC; + mci->scrub_mode = + ((errcor & MCCFG_ERRCOR_ECC_CRR_EN) ? SCRUB_FLAG_HW_SRC : 0) | + ((scrub & MCCFG_SCRUB_RGLR_SCRB_EN) ? SCRUB_FLAG_HW_PROG : 0); + + if (pasemi_edac_init_csrows(mci, pdev, + (mci->edac_cap & EDAC_FLAG_SECDED) ? + EDAC_SECDED : + ((mci->edac_cap & EDAC_FLAG_EC) ? + EDAC_EC : EDAC_NONE))) + goto fail; + + /* + * Clear status + */ + pasemi_edac_get_error_info(mci); + + if (edac_mc_add_mc(mci)) + goto fail; + + /* get this far and it's successful */ + return 0; + +fail: + edac_mc_free(mci); + return -ENODEV; +} + +static void __devexit pasemi_edac_remove(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); + + if (!mci) + return; + + edac_mc_free(mci); +} + + +static const struct pci_device_id pasemi_edac_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) }, +}; + +MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl); + +static struct pci_driver pasemi_edac_driver = { + .name = MODULE_NAME, + .probe = pasemi_edac_probe, + .remove = __devexit_p(pasemi_edac_remove), + .id_table = pasemi_edac_pci_tbl, +}; + +static int __init pasemi_edac_init(void) +{ + return pci_register_driver(&pasemi_edac_driver); +} + +static void __exit pasemi_edac_exit(void) +{ + pci_unregister_driver(&pasemi_edac_driver); +} + +module_init(pasemi_edac_init); +module_exit(pasemi_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>"); +MODULE_DESCRIPTION("MC support for PA Semi PA6T-1682M memory controller"); diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index a49cf0a39398..e25f712f2dc3 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -11,7 +11,7 @@ * * Written with reference to 82600 High Integration Dual PCI System * Controller Data Book: - * http://www.radisys.com/files/support_downloads/007-01277-0002.82600DataBook.pdf + * www.radisys.com/files/support_downloads/007-01277-0002.82600DataBook.pdf * references to this document given in [] */ @@ -20,9 +20,9 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> -#include "edac_mc.h" +#include "edac_core.h" -#define R82600_REVISION " Ver: 2.0.1 " __DATE__ +#define R82600_REVISION " Ver: 2.0.2 " __DATE__ #define EDAC_MOD_STR "r82600_edac" #define r82600_printk(level, fmt, arg...) \ @@ -131,10 +131,12 @@ struct r82600_error_info { u32 eapr; }; -static unsigned int disable_hardware_scrub = 0; +static unsigned int disable_hardware_scrub; -static void r82600_get_error_info (struct mem_ctl_info *mci, - struct r82600_error_info *info) +static struct edac_pci_ctl_info *r82600_pci; + +static void r82600_get_error_info(struct mem_ctl_info *mci, + struct r82600_error_info *info) { struct pci_dev *pdev; @@ -144,18 +146,19 @@ static void r82600_get_error_info (struct mem_ctl_info *mci, if (info->eapr & BIT(0)) /* Clear error to allow next error to be reported [p.62] */ pci_write_bits32(pdev, R82600_EAP, - ((u32) BIT(0) & (u32) BIT(1)), - ((u32) BIT(0) & (u32) BIT(1))); + ((u32) BIT(0) & (u32) BIT(1)), + ((u32) BIT(0) & (u32) BIT(1))); if (info->eapr & BIT(1)) /* Clear error to allow next error to be reported [p.62] */ pci_write_bits32(pdev, R82600_EAP, - ((u32) BIT(0) & (u32) BIT(1)), - ((u32) BIT(0) & (u32) BIT(1))); + ((u32) BIT(0) & (u32) BIT(1)), + ((u32) BIT(0) & (u32) BIT(1))); } -static int r82600_process_error_info (struct mem_ctl_info *mci, - struct r82600_error_info *info, int handle_errors) +static int r82600_process_error_info(struct mem_ctl_info *mci, + struct r82600_error_info *info, + int handle_errors) { int error_found; u32 eapaddr, page; @@ -172,25 +175,24 @@ static int r82600_process_error_info (struct mem_ctl_info *mci, * granularity (upper 19 bits only) */ page = eapaddr >> PAGE_SHIFT; - if (info->eapr & BIT(0)) { /* CE? */ + if (info->eapr & BIT(0)) { /* CE? */ error_found = 1; if (handle_errors) - edac_mc_handle_ce(mci, page, 0, /* not avail */ + edac_mc_handle_ce(mci, page, 0, /* not avail */ syndrome, edac_mc_find_csrow_by_page(mci, page), - 0, /* channel */ - mci->ctl_name); + 0, mci->ctl_name); } - if (info->eapr & BIT(1)) { /* UE? */ + if (info->eapr & BIT(1)) { /* UE? */ error_found = 1; if (handle_errors) /* 82600 doesn't give enough info */ edac_mc_handle_ue(mci, page, 0, - edac_mc_find_csrow_by_page(mci, page), - mci->ctl_name); + edac_mc_find_csrow_by_page(mci, page), + mci->ctl_name); } return error_found; @@ -211,11 +213,11 @@ static inline int ecc_enabled(u8 dramcr) } static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, - u8 dramcr) + u8 dramcr) { struct csrow_info *csrow; int index; - u8 drbar; /* SDRAM Row Boundry Address Register */ + u8 drbar; /* SDRAM Row Boundry Address Register */ u32 row_high_limit, row_high_limit_last; u32 reg_sdram, ecc_on, row_base; @@ -276,7 +278,7 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) debugf2("%s(): sdram refresh rate = %#0x\n", __func__, sdram_refresh_rate); debugf2("%s(): DRAMC register = %#0x\n", __func__, dramcr); - mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS); + mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS, 0); if (mci == NULL) return -ENOMEM; @@ -305,15 +307,16 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) mci->mod_name = EDAC_MOD_STR; mci->mod_ver = R82600_REVISION; mci->ctl_name = "R82600"; + mci->dev_name = pci_name(pdev); mci->edac_check = r82600_check; mci->ctl_page_to_phys = NULL; r82600_init_csrows(mci, pdev, dramcr); - r82600_get_error_info(mci, &discard); /* clear counters */ + r82600_get_error_info(mci, &discard); /* clear counters */ /* Here we assume that we will never see multiple instances of this * type of memory controller. The ID is therefore hardcoded to 0. */ - if (edac_mc_add_mc(mci,0)) { + if (edac_mc_add_mc(mci)) { debugf3("%s(): failed edac_mc_add_mc()\n", __func__); goto fail; } @@ -326,6 +329,17 @@ static int r82600_probe1(struct pci_dev *pdev, int dev_idx) pci_write_bits32(pdev, R82600_EAP, BIT(31), BIT(31)); } + /* allocating generic PCI control info */ + r82600_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!r82600_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + debugf3("%s(): success\n", __func__); return 0; @@ -336,7 +350,7 @@ fail: /* returns count (>= 0), or negative on error */ static int __devinit r82600_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { debugf0("%s()\n", __func__); @@ -350,6 +364,9 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) debugf0("%s()\n", __func__); + if (r82600_pci) + edac_pci_release_generic_ctl(r82600_pci); + if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL) return; @@ -358,11 +375,11 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) static const struct pci_device_id r82600_pci_tbl[] __devinitdata = { { - PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) - }, + PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) + }, { - 0, - } /* 0 terminated list. */ + 0, + } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, r82600_pci_tbl); @@ -389,7 +406,7 @@ module_exit(r82600_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. " - "on behalf of EADS Astrium"); + "on behalf of EADS Astrium"); MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 41476abc0693..db703758db98 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -224,6 +224,7 @@ ohci_update_phy_reg(struct fw_card *card, int addr, u32 val, old; reg_write(ohci, OHCI1394_PhyControl, OHCI1394_PhyControl_Read(addr)); + flush_writes(ohci); msleep(2); val = reg_read(ohci, OHCI1394_PhyControl); if ((val & OHCI1394_PhyControl_ReadDone) == 0) { @@ -586,7 +587,7 @@ static void context_stop(struct context *ctx) break; fw_notify("context_stop: still active (0x%08x)\n", reg); - msleep(1); + mdelay(1); } } diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 7c53be0387fb..fc984474162c 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -840,7 +840,6 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) container_of(base_orb, struct sbp2_command_orb, base); struct fw_unit *unit = orb->unit; struct fw_device *device = fw_device(unit->device.parent); - struct scatterlist *sg; int result; if (status != NULL) { @@ -876,11 +875,10 @@ complete_command_orb(struct sbp2_orb *base_orb, struct sbp2_status *status) dma_unmap_single(device->card->device, orb->base.request_bus, sizeof(orb->request), DMA_TO_DEVICE); - if (orb->cmd->use_sg > 0) { - sg = (struct scatterlist *)orb->cmd->request_buffer; - dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, + if (scsi_sg_count(orb->cmd) > 0) + dma_unmap_sg(device->card->device, scsi_sglist(orb->cmd), + scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); - } if (orb->page_table_bus != 0) dma_unmap_single(device->card->device, orb->page_table_bus, @@ -901,8 +899,8 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb) int sg_len, l, i, j, count; dma_addr_t sg_addr; - sg = (struct scatterlist *)orb->cmd->request_buffer; - count = dma_map_sg(device->card->device, sg, orb->cmd->use_sg, + sg = scsi_sglist(orb->cmd); + count = dma_map_sg(device->card->device, sg, scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); if (count == 0) goto fail; @@ -971,7 +969,7 @@ static int sbp2_command_orb_map_scatterlist(struct sbp2_command_orb *orb) return 0; fail_page_table: - dma_unmap_sg(device->card->device, sg, orb->cmd->use_sg, + dma_unmap_sg(device->card->device, sg, scsi_sg_count(orb->cmd), orb->cmd->sc_data_direction); fail: return -ENOMEM; @@ -1031,7 +1029,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done) orb->request.misc |= COMMAND_ORB_DIRECTION(SBP2_DIRECTION_TO_MEDIA); - if (cmd->use_sg && sbp2_command_orb_map_scatterlist(orb) < 0) + if (scsi_sg_count(cmd) && sbp2_command_orb_map_scatterlist(orb) < 0) goto fail_mapping; fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request)); diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c index 80d0121463d0..3ce8e2fbe15f 100644 --- a/drivers/firewire/fw-transaction.c +++ b/drivers/firewire/fw-transaction.c @@ -605,8 +605,10 @@ fw_send_response(struct fw_card *card, struct fw_request *request, int rcode) * check is sufficient to ensure we don't send response to * broadcast packets or posted writes. */ - if (request->ack != ACK_PENDING) + if (request->ack != ACK_PENDING) { + kfree(request); return; + } if (rcode == RCODE_COMPLETE) fw_fill_response(&request->response, request->request_header, @@ -628,11 +630,6 @@ fw_core_handle_request(struct fw_card *card, struct fw_packet *p) unsigned long flags; int tcode, destination, source; - if (p->payload_length > 2048) { - /* FIXME: send error response. */ - return; - } - if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) return; diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h index 5abed193f4a6..5ceaccd10564 100644 --- a/drivers/firewire/fw-transaction.h +++ b/drivers/firewire/fw-transaction.h @@ -123,6 +123,10 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, size_t length, void *callback_data); +/* + * Important note: The callback must guarantee that either fw_send_response() + * or kfree() is called on the @request. + */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, int tcode, int destination, int source, diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 6e935d7c63fd..c2e29571b007 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -165,12 +165,11 @@ static int __devinit swarm_ide_init_module(void) goto out; } - if (!(pldev = kmalloc(sizeof (*pldev), GFP_KERNEL))) { + if (!(pldev = kzalloc(sizeof (*pldev), GFP_KERNEL))) { err = -ENOMEM; goto out_unregister_driver; } - memset (pldev, 0, sizeof (*pldev)); pldev->name = swarm_ide_string; pldev->id = 0; pldev->dev.release = swarm_ide_platform_release; diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a91001c59b69..c5c33d35f87d 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -295,10 +295,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct addr_req *req; int ret = 0; - req = kmalloc(sizeof *req, GFP_KERNEL); + req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; - memset(req, 0, sizeof *req); if (src_addr) memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 9820c67ba47d..4df269f5d9ac 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3374,7 +3374,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); -void cm_get_ack_delay(struct cm_device *cm_dev) +static void cm_get_ack_delay(struct cm_device *cm_dev) { struct ib_device_attr attr; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 23af7a032a03..9ffb9987450a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -573,7 +573,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, break; case RDMA_TRANSPORT_IWARP: if (!id_priv->cm_id.iw) { - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3b41dc0c39dd..9574088f0d4e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -229,9 +229,8 @@ static void *alloc_ep(int size, gfp_t gfp) { struct iwch_ep_common *epc; - epc = kmalloc(size, gfp); + epc = kzalloc(size, gfp); if (epc) { - memset(epc, 0, size); kref_init(&epc->kref); spin_lock_init(&epc->lock); init_waitqueue_head(&epc->waitq); @@ -1914,6 +1913,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) fail3: cxgb3_free_stid(ep->com.tdev, ep->stid); fail2: + cm_id->rem_ref(cm_id); put_ep(&ep->com); fail1: out: diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 3cd6bf3402d1..e53a97af1260 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -79,7 +79,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.ipd = (ah_mult > 0) ? ((ehca_mult - 1) / ah_mult) : 0; } else - av->av.ipd = ehca_static_rate; + av->av.ipd = ehca_static_rate; av->av.lnh = ah_attr->ah_flags; av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index daf823ea1ace..043e4fb23fb0 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -204,11 +204,11 @@ struct ehca_mr { spinlock_t mrlock; enum ehca_mr_flag flags; - u32 num_pages; /* number of MR pages */ - u32 num_4k; /* number of 4k "page" portions to form MR */ + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ int acl; /* ACL (stored here for usage in reregister) */ u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ + /* usage in reregister) */ u64 size; /* size (stored here for usage in reregister) */ u32 fmr_page_size; /* page size for FMR */ u32 fmr_max_pages; /* max pages for FMR */ @@ -217,9 +217,6 @@ struct ehca_mr { /* fw specific data */ struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ struct h_galpas galpas; - /* data for userspace bridge */ - u32 nr_of_pages; - void *pagearray; }; struct ehca_mw { @@ -241,26 +238,29 @@ enum ehca_mr_pgi_type { struct ehca_mr_pginfo { enum ehca_mr_pgi_type type; - u64 num_pages; - u64 page_cnt; - u64 num_4k; /* number of 4k "page" portions */ - u64 page_4k_cnt; /* counter for 4k "page" portions */ - u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ - - /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - - /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct ib_umem_chunk *next_chunk; - u64 next_nmap; - - /* type EHCA_MR_PGI_FMR section */ - u64 *page_list; - u64 next_listelem; - /* next_4k also used within EHCA_MR_PGI_FMR */ + u64 num_kpages; + u64 kpage_cnt; + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; }; /* output parameters for MR/FMR hipz calls */ @@ -391,6 +391,6 @@ struct ehca_alloc_qp_parms { int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index fb3df5c271e7..1798e6466bd0 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -154,83 +154,83 @@ struct hcp_modify_qp_control_block { u32 reserved_70_127[58]; /* 70 */ }; -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) -#define MQPCB_DLID EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) #endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 01d4a148bd71..9e87883b561a 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) return ret; } -struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) { struct ehca_qp *ret = NULL; unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 4961eb88827c..4825975f88cf 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -96,7 +96,8 @@ int ehca_create_eq(struct ehca_shca *shca, for (i = 0; i < nr_pages; i++) { u64 rpage; - if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) { ret = H_RESOURCE; goto create_eq_exit2; } diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index bbd3c6a5822f..fc19ef9fd963 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -127,6 +127,7 @@ int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -137,7 +138,8 @@ int ehca_query_port(struct ib_device *ibdev, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_port1; @@ -197,6 +199,7 @@ int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, struct ehca_sma_attr *attr) { int ret = 0; + u64 h_ret; struct hipz_query_port *rblock; rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); @@ -205,7 +208,8 @@ int ehca_query_sma_attr(struct ehca_shca *shca, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_sma_attr1; @@ -230,9 +234,11 @@ query_sma_attr1: int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + u64 h_ret; + struct ehca_shca *shca; struct hipz_query_port *rblock; + shca = container_of(ibdev, struct ehca_shca, ib_device); if (index > 16) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; @@ -244,7 +250,8 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_pkey1; @@ -262,6 +269,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { int ret = 0; + u64 h_ret; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_port *rblock; @@ -277,7 +285,8 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, return -ENOMEM; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto query_gid1; @@ -302,11 +311,12 @@ int ehca_modify_port(struct ib_device *ibdev, struct ib_port_modify *props) { int ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_shca *shca; struct hipz_query_port *rblock; u32 cap; u64 hret; + shca = container_of(ibdev, struct ehca_shca, ib_device); if ((props->set_port_cap_mask | props->clr_port_cap_mask) & ~allowed_port_caps) { ehca_err(&shca->ib_device, "Non-changeable bits set in masks " @@ -325,7 +335,8 @@ int ehca_modify_port(struct ib_device *ibdev, goto modify_port1; } - if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't query port properties"); ret = -EINVAL; goto modify_port2; @@ -337,7 +348,8 @@ int ehca_modify_port(struct ib_device *ibdev, hret = hipz_h_modify_port(shca->ipz_hca_handle, port, cap, props->init_type, port_modify_mask); if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); + ehca_err(&shca->ib_device, "Modify port failed hret=%lx", + hret); ret = -EINVAL; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 96eba3830754..4fb01fcb63ae 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -49,26 +49,26 @@ #include "hipz_fns.h" #include "ipz_pt_fn.h" -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) static void queue_comp_task(struct ehca_cq *__cq); -static struct ehca_comp_pool* pool; +static struct ehca_comp_pool *pool; #ifdef CONFIG_HOTPLUG_CPU static struct notifier_block comp_pool_callback_nb; #endif @@ -85,8 +85,8 @@ static inline void comp_event_callback(struct ehca_cq *cq) return; } -static void print_error_data(struct ehca_shca * shca, void* data, - u64* rblock, int length) +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) { u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); u64 resource = rblock[1]; @@ -94,7 +94,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, switch (type) { case 0x1: /* Queue Pair */ { - struct ehca_qp *qp = (struct ehca_qp*)data; + struct ehca_qp *qp = (struct ehca_qp *)data; /* only print error data if AER is set */ if (rblock[6] == 0) @@ -107,7 +107,7 @@ static void print_error_data(struct ehca_shca * shca, void* data, } case 0x4: /* Completion Queue */ { - struct ehca_cq *cq = (struct ehca_cq*)data; + struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, "CQ 0x%x (resource=%lx) has errors.", @@ -572,7 +572,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +static inline int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -636,7 +636,7 @@ static void queue_comp_task(struct ehca_cq *__cq) __queue_comp_task(__cq, cct); } -static void run_comp_task(struct ehca_cpu_comp_task* cct) +static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; unsigned long flags; @@ -666,12 +666,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) static int comp_task(void *__cct) { - struct ehca_cpu_comp_task* cct = __cct; + struct ehca_cpu_comp_task *cct = __cct; int cql_empty; DECLARE_WAITQUEUE(wait, current); set_current_state(TASK_INTERRUPTIBLE); - while(!kthread_should_stop()) { + while (!kthread_should_stop()) { add_wait_queue(&cct->wait_queue, &wait); spin_lock_irq(&cct->task_lock); @@ -745,7 +745,7 @@ static void take_over_work(struct ehca_comp_pool *pool, list_splice_init(&cct->cq_list, &list); - while(!list_empty(&list)) { + while (!list_empty(&list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); list_del(&cq->entry); @@ -768,7 +768,7 @@ static int comp_pool_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if(!create_comp_task(pool, cpu)) { + if (!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); return NOTIFY_BAD; } @@ -838,7 +838,7 @@ int ehca_create_comp_pool(void) #ifdef CONFIG_HOTPLUG_CPU comp_pool_callback_nb.notifier_call = comp_pool_callback; - comp_pool_callback_nb.priority =0; + comp_pool_callback_nb.priority = 0; register_cpu_notifier(&comp_pool_callback_nb); #endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 77aeca6a2c2f..dce503bb7d6b 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -81,8 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, int num_phys_buf, int mr_access_flags, u64 *iova_start); -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata); +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); int ehca_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, @@ -192,7 +193,7 @@ void ehca_poll_eqs(unsigned long data); void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); #else -#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags)) +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) #endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 28ba2dd24216..36377c6db3d4 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -107,7 +107,7 @@ static DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; #ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache = NULL; +static struct kmem_cache *ctblk_cache; void *ehca_alloc_fw_ctrlblock(gfp_t flags) { @@ -200,8 +200,8 @@ static void ehca_destroy_slab_caches(void) #endif } -#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) -#define EHCA_REVID EHCA_BMASK_IBM(40,63) +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) static struct cap_descr { u64 mask; @@ -263,22 +263,27 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - if ((hcaaver == 1) && (revid == 0)) - shca->hw_level = 0x11; - else if ((hcaaver == 1) && (revid == 1)) - shca->hw_level = 0x12; - else if ((hcaaver == 1) && (revid == 2)) - shca->hw_level = 0x13; - else if ((hcaaver == 2) && (revid == 0)) - shca->hw_level = 0x21; - else if ((hcaaver == 2) && (revid == 0x10)) - shca->hw_level = 0x22; - else { + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { ehca_gen_warn("unknown hardware version" " - assuming default level"); shca->hw_level = 0x22; } - } + } else + shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); shca->sport[0].rate = IB_RATE_30_GBPS; @@ -290,7 +295,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - port = (struct hipz_query_port *) rblock; + port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); if (h_ret != H_SUCCESS) { ehca_gen_err("Cannot query port properties. h_ret=%lx", @@ -439,7 +444,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); @@ -666,7 +671,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, } /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); if (IS_ERR(ibpd)) { ehca_err(&shca->ib_device, "Cannot create internal PD."); ret = PTR_ERR(ibpd); @@ -863,18 +868,21 @@ int __init ehca_module_init(void) printk(KERN_INFO "eHCA Infiniband Device Driver " "(Rel.: SVNEHCA_0023)\n"); - if ((ret = ehca_create_comp_pool())) { + ret = ehca_create_comp_pool(); + if (ret) { ehca_gen_err("Cannot create comp pool."); return ret; } - if ((ret = ehca_create_slab_caches())) { + ret = ehca_create_slab_caches(); + if (ret) { ehca_gen_err("Cannot create SLAB caches"); ret = -ENOMEM; goto module_init1; } - if ((ret = ibmebus_register_driver(&ehca_driver))) { + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; goto module_init2; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index add79bd44e39..6262c5462d50 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -48,6 +48,11 @@ #include "hcp_if.h" #include "hipz_hw.h" +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; @@ -56,9 +61,9 @@ static struct ehca_mr *ehca_mr_new(void) struct ehca_mr *me; me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mrlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -74,9 +79,9 @@ static struct ehca_mw *ehca_mw_new(void) struct ehca_mw *me; me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) { + if (me) spin_lock_init(&me->mwlock); - } else + else ehca_gen_err("alloc failed"); return me; @@ -106,11 +111,12 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); if (ret) { + ehca_mr_delete(e_maxmr); ib_mr = ERR_PTR(ret); goto get_dma_mr_exit0; } @@ -144,9 +150,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); u64 size; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ if ((num_phys_buf <= 0) || !phys_buf_array) { ehca_err(pd->device, "bad input values: num_phys_buf=%x " @@ -190,12 +193,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit0; } - /* determine number of MR pages */ - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - /* register MR on HCA */ if (ehca_mr_is_maxmr(size, iova_start)) { e_mr->flags |= EHCA_MR_FLAG_MAXMR; @@ -207,13 +204,22 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, goto reg_phys_mr_exit1; } } else { - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + + size, EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, @@ -240,18 +246,19 @@ reg_phys_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, - int mr_access_flags, struct ib_udata *udata) +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) { struct ib_mr *ib_mr; struct ehca_mr *e_mr; struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; int ret; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; if (!pd) { ehca_gen_err("bad pd=%p", pd); @@ -289,7 +296,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt e_mr->umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); if (IS_ERR(e_mr->umem)) { - ib_mr = (void *) e_mr->umem; + ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; } @@ -301,23 +308,24 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt } /* determine number of MR pages */ - num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / - PAGE_SIZE); - num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + num_hwpages = NUM_CHUNKS((virt % EHCA_PAGESIZE) + length, + EHCA_PAGESIZE); /* register MR on HCA */ - pginfo.type = EHCA_MR_PGI_USER; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.region = e_mr->umem; - pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; - pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, - (&e_mr->umem->chunk_list), - list); - - ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = e_mr->umem->offset / EHCA_PAGESIZE; + pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, + (&e_mr->umem->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); if (ret) { ib_mr = ERR_PTR(ret); goto reg_user_mr_exit2; @@ -360,9 +368,9 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, struct ehca_pd *new_pd; u32 tmp_lkey, tmp_rkey; unsigned long sl_flags; - u32 num_pages_mr = 0; - u32 num_pages_4k = 0; /* 4k portion "pages" */ - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -414,7 +422,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, goto rereg_phys_mr_exit0; } if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" " phys_buf_array=%p num_phys_buf=%x", mr_rereg_mask, phys_buf_array, num_phys_buf); ret = -EINVAL; @@ -438,10 +446,10 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, /* set requested values dependent on rereg request */ spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; /* new == old address */ - new_size = e_mr->size; /* new == old length */ - new_acl = e_mr->acl; /* new == old access control */ - new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); if (mr_rereg_mask & IB_MR_REREG_TRANS) { new_start = iova_start; /* change address */ @@ -458,17 +466,18 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, ret = -EINVAL; goto rereg_phys_mr_exit1; } - num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = num_phys_buf; - pginfo.phys_buf_array = phys_buf_array; - pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / - EHCA_PAGESIZE); + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % EHCA_PAGESIZE) + + new_size, EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); } if (mr_rereg_mask & IB_MR_REREG_ACCESS) new_acl = mr_access_flags; @@ -510,7 +519,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && (my_pd->ownpid != cur_pid)) { @@ -536,14 +545,14 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) "hca_hndl=%lx mr_hndl=%lx lkey=%x", h_ret, mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_query_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto query_mr_exit1; } - mr_attr->pd = mr->pd; + mr_attr->pd = mr->pd; mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); query_mr_exit1: @@ -596,7 +605,7 @@ int ehca_dereg_mr(struct ib_mr *mr) "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", h_ret, shca, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto dereg_mr_exit0; } @@ -622,7 +631,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout = {{0},0}; + struct ehca_mw_hipzout_parms hipzout; e_mw = ehca_mw_new(); if (!e_mw) { @@ -636,7 +645,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " "shca=%p hca_hndl=%lx mw=%p", h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); goto alloc_mw_exit1; } /* successful MW allocation */ @@ -679,7 +688,7 @@ int ehca_dealloc_mw(struct ib_mw *mw) "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, e_mw->ipz_mw_handle.handle); - return ehca_mrmw_map_hrc_free_mw(h_ret); + return ehca2ib_return_code(h_ret); } /* successful deallocation */ ehca_mw_delete(e_mw); @@ -699,7 +708,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, struct ehca_mr *e_fmr; int ret; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; /* check other parameters */ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && @@ -745,6 +754,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, e_fmr->flags |= EHCA_MR_FLAG_FMR; /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, @@ -783,7 +793,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, container_of(fmr->device, struct ehca_shca, ib_device); struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; u32 tmp_lkey, tmp_rkey; if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { @@ -809,14 +819,16 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); } - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = list_len; - pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); - pginfo.page_list = page_list; - pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / - EHCA_PAGESIZE); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.num_hwpages = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = ((iova & (e_fmr->fmr_page_size-1)) / + EHCA_PAGESIZE); + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, list_len * e_fmr->fmr_page_size, e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); if (ret) @@ -831,8 +843,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, map_phys_fmr_exit0: if (ret) ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " - "iova=%lx", - ret, fmr, page_list, list_len, iova); + "iova=%lx", ret, fmr, page_list, list_len, iova); return ret; } /* end ehca_map_phys_fmr() */ @@ -922,7 +933,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto free_fmr_exit0; } /* successful deregistration */ @@ -950,12 +961,12 @@ int ehca_reg_mr(struct ehca_shca *shca, int ret; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; + hipz_acl |= 0x00000001; h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, (u64)iova_start, size, hipz_acl, @@ -963,7 +974,7 @@ int ehca_reg_mr(struct ehca_shca *shca, if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca_mrmw_map_hrc_alloc(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_mr_exit0; } @@ -974,11 +985,11 @@ int ehca_reg_mr(struct ehca_shca *shca, goto ehca_reg_mr_exit1; /* successful registration */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; return 0; @@ -988,10 +999,10 @@ ehca_reg_mr_exit1: if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", + "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x", h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_pages, - pginfo->num_4k, ret); + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " "not recoverable"); } @@ -999,9 +1010,9 @@ ehca_reg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_pages, pginfo->num_4k); + pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr() */ @@ -1026,24 +1037,24 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } /* max 512 pages per shot */ - for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { - rnum = pginfo->num_4k % 512; /* last shot */ + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ if (rnum == 0) - rnum = 512; /* last shot is full */ + rnum = MAX_RPAGES; /* last shot is full */ } else - rnum = 512; + rnum = MAX_RPAGES; - if (rnum > 1) { - ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " "bad rc, ret=%x rnum=%x kpage=%p", ret, rnum, kpage); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { rpage = virt_to_abs(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", @@ -1051,21 +1062,14 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = -EFAULT; goto ehca_reg_mr_rpages_exit1; } - } else { /* rnum==1 */ - ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 " - "bad rc, ret=%x i=%x", ret, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } + } else + rpage = *kpage; h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, 0, /* pagesize 4k */ 0, rpage, rnum); - if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { /* * check for 'registration complete'==H_SUCCESS * and for 'page registered'==H_PAGE_REGISTERED @@ -1078,7 +1082,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1089,7 +1093,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, e_mr->ib.ib_mr.lkey, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle); - ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + ret = ehca2ib_return_code(h_ret); break; } else ret = 0; @@ -1101,8 +1105,8 @@ ehca_reg_mr_rpages_exit1: ehca_reg_mr_rpages_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " - "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, - pginfo->num_pages, pginfo->num_4k); + "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr_rpages() */ @@ -1124,7 +1128,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, u64 *kpage; u64 rpage; struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1137,12 +1141,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, } pginfo_save = *pginfo; - ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); if (ret) { ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", - e_mr, pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k,kpage); + "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } rpage = virt_to_abs(kpage); @@ -1164,7 +1168,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); *pginfo = pginfo_save; ret = -EAGAIN; - } else if ((u64*)hipzout.vaddr != iova_start) { + } else if ((u64 *)hipzout.vaddr != iova_start) { ehca_err(&shca->ib_device, "PHYP changed iova_start in " "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, @@ -1176,11 +1180,11 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, * successful reregistration * note: start and start_out are identical for eServer HCAs */ - e_mr->num_pages = pginfo->num_pages; - e_mr->num_4k = pginfo->num_4k; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; *lkey = hipzout.lkey; *rkey = hipzout.rkey; } @@ -1190,9 +1194,9 @@ ehca_rereg_mr_rereg1_exit1: ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " - "pginfo=%p num_pages=%lx num_4k=%lx", - ret, *lkey, *rkey, pginfo, pginfo->num_pages, - pginfo->num_4k); + "pginfo=%p num_kpages=%lx num_hwpages=%lx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); return ret; } /* end ehca_rereg_mr_rereg1() */ @@ -1214,10 +1218,12 @@ int ehca_rereg_mr(struct ehca_shca *shca, int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ /* first determine reregistration hCall(s) */ - if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || - (pginfo->num_4k > e_mr->num_4k)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " - "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); rereg_1_hcall = 0; rereg_3_hcall = 1; } @@ -1253,7 +1259,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, h_ret, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_rereg_mr_exit0; } /* clean ehca_mr_t, without changing struct ib_mr and lock */ @@ -1281,9 +1287,9 @@ ehca_rereg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); return ret; } /* end ehca_rereg_mr() */ @@ -1295,97 +1301,86 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, { int ret = 0; u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ struct ehca_pd *e_pd = container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); struct ehca_mr save_fmr; u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; - /* first check if reregistration hCall can be used for unmap */ - if (e_fmr->fmr_max_pages > 512) { - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (rereg_1_hcall) { + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { /* * note: after using rereg hcall with len=0, * rereg hcall must be used again for registering pages */ h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " - "mr_hndl=%lx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - rereg_3_hcall = 1; - } else { + if (h_ret == H_SUCCESS) { /* successful reregistration */ e_fmr->start = NULL; e_fmr->size = 0; tmp_lkey = hipzout.lkey; tmp_rkey = hipzout.rkey; + return 0; } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " + "mr_hndl=%lx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ } - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca_mrmw_map_hrc_free_mr(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_pages = 0; - pginfo.num_4k = 0; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_unmap_one_fmr_exit0; - } + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = 0; + pginfo.num_hwpages = 0; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_unmap_one_fmr_exit0; } ehca_unmap_one_fmr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, - rereg_1_hcall, rereg_3_hcall); + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); return ret; } /* end ehca_unmap_one_fmr() */ @@ -1403,7 +1398,7 @@ int ehca_reg_smr(struct ehca_shca *shca, int ret = 0; u64 h_ret; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1419,15 +1414,15 @@ int ehca_reg_smr(struct ehca_shca *shca, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + ret = ehca2ib_return_code(h_ret); goto ehca_reg_smr_exit0; } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1453,10 +1448,10 @@ int ehca_reg_internal_maxmr( struct ehca_mr *e_mr; u64 *iova_start; u64 size_maxmr; - struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_pginfo pginfo; struct ib_phys_buf ib_pbuf; - u32 num_pages_mr; - u32 num_pages_4k; /* 4k portion "pages" */ + u32 num_kpages; + u32 num_hwpages; e_mr = ehca_mr_new(); if (!e_mr) { @@ -1468,28 +1463,29 @@ int ehca_reg_internal_maxmr( /* register internal max-MR on HCA */ size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64*)KERNELBASE; + iova_start = (u64 *)KERNELBASE; ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; - num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + - PAGE_SIZE - 1) / PAGE_SIZE); - num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); - - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_pages = num_pages_mr; - pginfo.num_4k = num_pages_4k; - pginfo.num_phys_buf = 1; - pginfo.phys_buf_array = &ib_pbuf; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)iova_start % EHCA_PAGESIZE) + size_maxmr, + EHCA_PAGESIZE); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " - "num_pages_4k=%x", e_mr, iova_start, size_maxmr, - num_pages_mr, num_pages_4k); + "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); goto ehca_reg_internal_maxmr_exit1; } @@ -1524,7 +1520,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca, u64 h_ret; struct ehca_mr *e_origmr = shca->maxmr; u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + struct ehca_mr_hipzout_parms hipzout; ehca_mrmw_map_acl(acl, &hipz_acl); ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); @@ -1538,14 +1534,14 @@ int ehca_reg_maxmr(struct ehca_shca *shca, h_ret, e_origmr, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); - return ehca_mrmw_map_hrc_reg_smr(h_ret); + return ehca2ib_return_code(h_ret); } /* successful registration */ - e_newmr->num_pages = e_origmr->num_pages; - e_newmr->num_4k = e_origmr->num_4k; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; e_newmr->ipz_mr_handle = hipzout.handle; *lkey = hipzout.lkey; *rkey = hipzout.rkey; @@ -1677,299 +1673,187 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, /*----------------------------------------------------------------------*/ -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; struct ib_umem_chunk *prev_chunk; struct ib_umem_chunk *chunk; - struct ib_phys_buf *pbuf; - u64 *fmrlist; - u64 num4k, pgaddr, offs4k; + u64 pgaddr; u32 i = 0; u32 j = 0; - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - while (pginfo->next_4k < offs4k + num4k) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, " - "page_cnt=%lx " - "num_pages=%lx " - "page_4k_cnt=%lx " - "num_4k=%lx i=%x", - pginfo->page_cnt, - pginfo->num_pages, - pginfo->page_4k_cnt, - pginfo->num_4k, i); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - *kpage = phys_to_abs( - (pbuf->addr & EHCA_PAGEMASK) - + (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx " - "pbuf->size=%lx " - "next_4k=%lx", pbuf->addr, - pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - /* loop over desired chunk entries */ - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - for (i = pginfo->next_nmap; i < chunk->nmap; ) { - pgaddr = ( page_to_pfn(chunk->page_list[i].page) - << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr + - (pginfo->next_4k * - EHCA_PAGESIZE)); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx " - "chunk->page_list[i]=%lx " - "i=%x next_4k=%lx mr=%p", - pgaddr, - (u64)sg_dma_address( - &chunk-> - page_list[i]), - i, pginfo->next_4k, e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - kpage++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; - i++; - } - j++; - if (j >= number) break; - } - if ((pginfo->next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) - break; - else - prev_chunk = chunk; - } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - /* loop over desired page_list entries */ - fmrlist = pginfo->page_list + pginfo->next_listelem; - for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); + /* loop over desired chunk entries */ + chunk = pginfo->u.usr.next_chunk; + prev_chunk = pginfo->u.usr.next_chunk; + list_for_each_entry_continue( + chunk, (&(pginfo->u.usr.region->chunk_list)), list) { + for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { + pgaddr = page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ; + *kpage = phys_to_abs(pgaddr + + (pginfo->next_hwpage * + EHCA_PAGESIZE)); if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, - pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + ehca_gen_err("pgaddr=%lx " + "chunk->page_list[i]=%lx " + "i=%x next_hwpage=%lx", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[i]), + i, pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; kpage++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - fmrlist++; - pginfo->next_4k = 0; + if (pginfo->next_hwpage % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + i++; } + j++; + if (j >= number) break; } - } else { - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - goto ehca_set_pagebuf_exit0; + if ((pginfo->u.usr.next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { + pginfo->u.usr.next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; } - -ehca_set_pagebuf_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " - "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " - "next_listelem=%lx region=%p next_chunk=%p " - "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, - pginfo->num_pages, pginfo->num_4k, - pginfo->next_buf, pginfo->next_4k, number, kpage, - pginfo->page_cnt, pginfo->page_4k_cnt, i, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); + pginfo->u.usr.next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->u.usr.region->chunk_list)), + list); return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ +} -/* setup 1 page from page info page buffer */ -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage) +int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) { int ret = 0; - struct ib_phys_buf *tmp_pbuf; - u64 *fmrlist; - struct ib_umem_chunk *chunk; - struct ib_umem_chunk *prev_chunk; - u64 pgaddr, num4k, offs4k; - - if (pginfo->type == EHCA_MR_PGI_PHYS) { - /* sanity check */ - if ((pginfo->page_cnt >= pginfo->num_pages) || - (pginfo->page_4k_cnt >= pginfo->num_4k)) { - ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " - "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", - pginfo->page_cnt, pginfo->num_pages, - pginfo->page_4k_cnt, pginfo->num_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; - num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + - EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; - offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; - *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) && tmp_pbuf->addr ) { - ehca_gen_err("tmp_pbuf->addr=%lx" - " tmp_pbuf->size=%lx next_4k=%lx", - tmp_pbuf->addr, tmp_pbuf->size, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) - (pginfo->page_cnt)++; - if (pginfo->next_4k >= offs4k + num4k) { - (pginfo->next_buf)++; - pginfo->next_4k = 0; - } - } else if (pginfo->type == EHCA_MR_PGI_USER) { - chunk = pginfo->next_chunk; - prev_chunk = pginfo->next_chunk; - list_for_each_entry_continue(chunk, - (&(pginfo->region->chunk_list)), - list) { - pgaddr = ( page_to_pfn(chunk->page_list[ - pginfo->next_nmap].page) - << PAGE_SHIFT); - *rpage = phys_to_abs(pgaddr + - (pginfo->next_4k * EHCA_PAGESIZE)); - if ( !(*rpage) ) { - ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx" - " next_nmap=%lx next_4k=%lx mr=%p", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[ - pginfo-> - next_nmap]), - pginfo->next_nmap, pginfo->next_4k, - e_mr); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; - } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (PAGE_SIZE / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_nmap)++; - pginfo->next_4k = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % EHCA_PAGESIZE) + + pbuf->size, EHCA_PAGESIZE); + offs_hw = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%lx num_kpages=%lx " + "hwpage_cnt=%lx " + "num_hwpages=%lx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; } - if (pginfo->next_nmap >= chunk->nmap) { - pginfo->next_nmap = 0; - prev_chunk = chunk; + *kpage = phys_to_abs( + (pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_hwpage * EHCA_PAGESIZE)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%lx " + "pbuf->size=%lx " + "next_hwpage=%lx", pbuf->addr, + pbuf->size, + pginfo->next_hwpage); + return -EFAULT; } - break; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (pginfo->next_hwpage % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->kpage_cnt)++; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; } - pginfo->next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->region->chunk_list)), - list); - } else if (pginfo->type == EHCA_MR_PGI_FMR) { - fmrlist = pginfo->page_list + pginfo->next_listelem; - *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + - pginfo->next_4k * EHCA_PAGESIZE); - if ( !(*rpage) ) { + } + return ret; +} + +int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_hwpage * EHCA_PAGESIZE); + if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_4k=%lx", - *fmrlist, fmrlist, pginfo->next_listelem, - pginfo->next_4k); - ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + "next_listelem=%lx next_hwpage=%lx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; } - (pginfo->page_4k_cnt)++; - (pginfo->next_4k)++; - if (pginfo->next_4k % - (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { - (pginfo->page_cnt)++; - (pginfo->next_listelem)++; - pginfo->next_4k = 0; + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / EHCA_PAGESIZE) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; } - } else { + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = ehca_set_pagebuf_user1(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: ehca_gen_err("bad pginfo->type=%x", pginfo->type); ret = -EFAULT; - goto ehca_set_pagebuf_1_exit0; + break; } - -ehca_set_pagebuf_1_exit0: - if (ret) - ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " - "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " - "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " - "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, - pginfo, pginfo->type, pginfo->num_pages, - pginfo->num_4k, pginfo->next_buf, pginfo->next_4k, - rpage, pginfo->page_cnt, pginfo->page_4k_cnt, - pginfo->next_listelem, pginfo->region, - pginfo->next_chunk, pginfo->next_nmap); return ret; -} /* end ehca_set_pagebuf_1() */ +} /* end ehca_set_pagebuf() */ /*----------------------------------------------------------------------*/ @@ -1982,7 +1866,7 @@ int ehca_mr_is_maxmr(u64 size, { /* a MR is treated as max-MR only if it fits following: */ if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void*)KERNELBASE)) { + (iova_start == (void *)KERNELBASE)) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2042,196 +1926,23 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, /*----------------------------------------------------------------------*/ /* - * map HIPZ rc to IB retcodes for MR/MW allocations - * Used for hipz_mr_reg_alloc and hipz_mw_alloc. - */ -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_alloc() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering last page - */ -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* registration complete */ - return 0; - case H_PAGE_REGISTERED: /* page registered */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_last() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for MR register rpage - * Used for hipz_h_register_rpage_mr at registering one page, but not last page - */ -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_PAGE_REGISTERED: /* page registered */ - return 0; - case H_SUCCESS: /* registration complete */ - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ -/* case H_QT_PARM: invalid queue type */ - case H_PARAMETER: /* - * invalid logical address, - * or count zero or greater 512 - */ - case H_TABLE_FULL: /* page table full */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ - -/*----------------------------------------------------------------------*/ - -/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_query_mr() */ - -/*----------------------------------------------------------------------*/ -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MR resource - * Used for hipz_h_free_resource_mr - */ -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mr() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for freeing MW resource - * Used for hipz_h_free_resource_mw - */ -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* resource freed */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_R_STATE: /* invalid resource state */ - case H_HARDWARE: /* HCA not operational */ - return -EINVAL; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_free_mw() */ - -/*----------------------------------------------------------------------*/ - -/* - * map HIPZ rc to IB retcodes for SMR registrations - * Used for hipz_h_register_smr. - */ -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) -{ - switch (hipz_rc) { - case H_SUCCESS: /* successful completion */ - return 0; - case H_ADAPTER_PARM: /* invalid adapter handle */ - case H_RH_PARM: /* invalid resource handle */ - case H_MEM_PARM: /* invalid MR virtual address */ - case H_MEM_ACCESS_PARM: /* invalid access controls */ - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - return -EINVAL; - case H_BUSY: /* long busy */ - return -EBUSY; - default: - return -EINVAL; - } -} /* end ehca_mrmw_map_hrc_reg_smr() */ - -/*----------------------------------------------------------------------*/ - -/* * MR destructor and constructor * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, * except struct ib_mr and spinlock */ void ehca_mr_deletenew(struct ehca_mr *mr) { - mr->flags = 0; - mr->num_pages = 0; - mr->num_4k = 0; - mr->acl = 0; - mr->start = NULL; + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; mr->fmr_page_size = 0; mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); memset(&mr->galpas, 0, sizeof(mr->galpas)); - mr->nr_of_pages = 0; - mr->pagearray = NULL; } /* end ehca_mr_deletenew() */ int ehca_init_mrmw_cache(void) diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index d936e40a5748..24f13fe3708b 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -101,15 +101,10 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, u64 *page_list, int list_len); -int ehca_set_pagebuf(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, u32 number, u64 *kpage); -int ehca_set_pagebuf_1(struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo, - u64 *rpage); - int ehca_mr_is_maxmr(u64 size, u64 *iova_start); @@ -121,20 +116,6 @@ void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, int *ib_acl); -int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); - -int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); - void ehca_mr_deletenew(struct ehca_mr *mr); #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 8707d297ce4c..818803057ebf 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -53,13 +53,13 @@ struct ehca_vsgentry { u32 length; }; -#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) /* * Unreliable Datagram Address Vector Format @@ -206,10 +206,10 @@ struct ehca_wqe { }; -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) -#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 74671250303f..48e9ceacd6fa 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -602,10 +602,10 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, /* UD circumvention */ parms.act_nr_send_sges -= 2; parms.act_nr_recv_sges -= 2; - swqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_send_sges]); - rwqe_size = offsetof(struct ehca_wqe, - u.ud_av.sg_list[parms.act_nr_recv_sges]); + swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ + parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[ + parms.act_nr_recv_sges]); } if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { @@ -690,8 +690,8 @@ struct ehca_qp *internal_create_qp(struct ib_pd *pd, if (my_qp->send_cq) { ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { - ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", - ret); + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%x", ret); goto create_qp_exit4; } } @@ -749,7 +749,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, struct ehca_qp *ret; ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp; + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; } int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, @@ -780,7 +780,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); if (IS_ERR(my_qp)) - return (struct ib_srq *) my_qp; + return (struct ib_srq *)my_qp; /* copy back return values */ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; @@ -875,7 +875,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, my_qp, qp_num, h_ret); return ehca2ib_return_code(h_ret); } - bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ @@ -890,7 +890,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, } /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { if (ehca_debug_level) @@ -898,7 +898,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = (*bad_wqe_cnt)+1; } /* @@ -1003,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " "new qp_state=%x attribute_mask=%x", my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); @@ -1019,7 +1019,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit1; } - if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); else { ret = -EINVAL; @@ -1077,7 +1078,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, spin_lock_irqsave(&my_qp->spinlock_s, flags); squeue_locked = 1; /* mark next free wqe */ - wqe = (struct ehca_wqe*) + wqe = (struct ehca_wqe *) ipz_qeit_get(&my_qp->ipz_squeue); wqe->optype = wqe->wqef = 0xff; ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", @@ -1312,7 +1313,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " - "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } @@ -1411,7 +1412,7 @@ int ehca_query_qp(struct ib_qp *qp, } if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device,"Invalid attribute mask " + ehca_err(qp->device, "Invalid attribute mask " "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", my_qp, qp->qp_num, qp_attr_mask); return -EINVAL; @@ -1419,7 +1420,7 @@ int ehca_query_qp(struct ib_qp *qp, qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!qpcb) { - ehca_err(qp->device,"Out of memory for qpcb " + ehca_err(qp->device, "Out of memory for qpcb " "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); return -ENOMEM; } @@ -1431,7 +1432,7 @@ int ehca_query_qp(struct ib_qp *qp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device,"hipz_h_query_qp() failed " + ehca_err(qp->device, "hipz_h_query_qp() failed " "ehca_qp=%p qp_num=%x h_ret=%lx", my_qp, qp->qp_num, h_ret); goto query_qp_exit1; @@ -1442,7 +1443,7 @@ int ehca_query_qp(struct ib_qp *qp, if (qp_attr->cur_qp_state == -EINVAL) { ret = -EINVAL; - ehca_err(qp->device,"Got invalid ehca_qp_state=%x " + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " "ehca_qp=%p qp_num=%x", qpcb->qp_state, my_qp, qp->qp_num); goto query_qp_exit1; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 61da65e6e5e0..94eed70fedf5 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -79,7 +79,8 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, } if (ehca_debug_level) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } @@ -99,7 +100,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; struct ib_sge *sge = send_wr->sg_list; ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x",idx, send_wr->wr_id, + "send_flags=%x opcode=%x", idx, send_wr->wr_id, send_wr->num_sge, send_wr->send_flags, send_wr->opcode); if (mad_hdr) { @@ -116,7 +117,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *) abs_to_virt(sge->addr); + u8 *data = (u8 *)abs_to_virt(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -534,9 +535,11 @@ poll_cq_one_read_cqe: cqe_count++; if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + struct ehca_qp *qp; int purgeflag; unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -551,8 +554,8 @@ poll_cq_one_read_cqe: spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { - ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " - "src_qp=%x", + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); if (ehca_debug_level) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 03b185f873da..678b81391861 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -93,14 +93,14 @@ extern int ehca_debug_level; #define ehca_gen_dbg(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) #define ehca_gen_warn(format, arg...) \ do { \ if (unlikely(ehca_debug_level)) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ get_paca()->paca_index, __FUNCTION__, ## arg); \ } while (0) @@ -114,12 +114,12 @@ extern int ehca_debug_level; * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex> */ #define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ + do { \ + unsigned int x; \ unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char*)(adr); \ + unsigned char *deb = (unsigned char *)(adr); \ for (x = 0; x < l; x += 16) { \ - printk("EHCA_DMP:%s " format \ + printk(KERN_INFO "EHCA_DMP:%s " format \ " adr=%p ofs=%04x %016lx %016lx\n", \ __FUNCTION__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ @@ -128,16 +128,16 @@ extern int ehca_debug_level; } while (0) /* define a bitmask, little endian version */ -#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) /* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) /* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) /* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) /** * EHCA_BMASK_SET - return value shifted and masked by mask @@ -145,14 +145,14 @@ extern int ehca_debug_level; * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask * in variable */ -#define EHCA_BMASK_SET(mask,value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask)) +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) /** * EHCA_BMASK_GET - extract a parameter from value by mask */ -#define EHCA_BMASK_GET(mask,value) \ - (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask))) +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) /* Converts ehca to ib return code */ @@ -161,8 +161,11 @@ static inline int ehca2ib_return_code(u64 ehca_rc) switch (ehca_rc) { case H_SUCCESS: return 0; + case H_RESOURCE: /* Resource in use */ case H_BUSY: return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ case H_NO_MEM: return -ENOMEM; default: diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 3031b3bb56f9..05c415744e3b 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -70,7 +70,7 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context) static void ehca_mm_open(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -86,7 +86,7 @@ static void ehca_mm_open(struct vm_area_struct *vma) static void ehca_mm_close(struct vm_area_struct *vma) { - u32 *count = (u32*)vma->vm_private_data; + u32 *count = (u32 *)vma->vm_private_data; if (!count) { ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", vma->vm_start, vma->vm_end); @@ -215,7 +215,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 2: /* qp rqueue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(rq) failed rc=%x qp_num=%x", @@ -227,7 +228,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, case 3: /* qp squeue_addr */ ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); if (unlikely(ret)) { ehca_err(qp->ib_qp.device, "ehca_mmap_queue(sq) failed rc=%x qp_num=%x", diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 4776a8b0feec..3394e05f4b4f 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -501,8 +501,8 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, return H_PARAMETER; } - return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, - qp_handle.handle,logical_address_of_page, + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, count); } @@ -522,9 +522,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, qp_handle.handle, /* r6 */ 0, 0, 0, 0, 0, 0); if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void*)outs[0]; + *log_addr_next_sq_wqe2processed = (void *)outs[0]; if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void*)outs[1]; + *log_addr_next_rq_wqe2processed = (void *)outs[1]; return ret; } diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 0b1a4772c78a..214821095cb1 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -50,7 +50,7 @@ int hcall_map_page(u64 physaddr, u64 *mapaddr) int hcall_unmap_page(u64 mapaddr) { - iounmap((volatile void __iomem*)mapaddr); + iounmap((volatile void __iomem *) mapaddr); return 0; } diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h index 20898a153446..868735fd3187 100644 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -53,10 +53,10 @@ #define hipz_galpa_load_cq(gal, offset) \ hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) -#define hipz_galpa_store_qp(gal,offset, value) \ +#define hipz_galpa_store_qp(gal, offset, value) \ hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) #define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) { diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index dad6dea5636b..d9739e554515 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -161,11 +161,11 @@ struct hipz_qptemm { /* 0x1000 */ }; -#define QPX_SQADDER EHCA_BMASK_IBM(48,63) -#define QPX_RQADDER EHCA_BMASK_IBM(48,63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3) +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) /* MRMWPT Entry Memory Map */ struct hipz_mrmwmm { @@ -187,7 +187,7 @@ struct hipz_mrmwmm { }; -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) struct hipz_qpedmm { /* 0x00 */ @@ -238,7 +238,7 @@ struct hipz_qpedmm { u64 qpedx_rrva3; }; -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) /* CQ Table Entry Memory Map */ struct hipz_cqtemm { @@ -263,12 +263,12 @@ struct hipz_cqtemm { /* 0x1000 */ }; -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) -#define CQX_FECADDER EHCA_BMASK_IBM(32,63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) /* EQ Table Entry Memory Map */ struct hipz_eqtemm { @@ -293,7 +293,7 @@ struct hipz_eqtemm { }; -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) /* access control defines for MR/MW */ #define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index bf7a40088f61..9606f13ed092 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -114,7 +114,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, */ f = 0; while (f < nr_of_pages) { - u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); + u8 *kpage = (u8 *)get_zeroed_page(GFP_KERNEL); int k; if (!kpage) goto ipz_queue_ctor_exit0; /*NOMEM*/ diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 007f0882fd40..39a4f64aff41 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -240,7 +240,7 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; ipz_qeit_eq_get_inc(queue); /* this is a good one */ @@ -250,7 +250,7 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) { void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *) ret; + u32 qe = *(u8 *)ret; if ((qe >> 7) != (queue->toggle_state & 1)) return NULL; return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 9361f5ab8bd6..09c5fd84b1e3 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1889,7 +1889,7 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, /* Below is "non-zero" to force override, but both actual LEDs are off */ #define LED_OVER_BOTH_OFF (8) -void ipath_run_led_override(unsigned long opaque) +static void ipath_run_led_override(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *)opaque; int timeoff; diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 6b9147964a4f..b4503e9c1e95 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -426,8 +426,8 @@ bail: * @buffer: data to write * @len: number of bytes to write */ -int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) +static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buffer, int len) { u8 single_byte; int sub_len; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 47aa43428fbf..1fd91c59f246 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -70,7 +70,7 @@ static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) * If rewrite is true, and bits are set in the sendbufferror registers, * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) +static void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) { u32 piobcnt; unsigned long sbuf[4]; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 3105005fc9d2..ace63ef78e6f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -776,7 +776,6 @@ void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *, int); /* * Set LED override, only the two LSBs have "public" meaning, but @@ -820,7 +819,6 @@ static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data) #define IPATH_MDIO_CTRL_8355_REG_10 0x1D int ipath_get_user_pages(unsigned long, size_t, struct page **); -int ipath_get_user_pages_nocopy(unsigned long, struct page **); void ipath_release_user_pages(struct page **, size_t); void ipath_release_user_pages_on_close(struct page **, size_t); int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 85256747d8a1..c69c25239443 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -507,7 +507,7 @@ static int want_buffer(struct ipath_devdata *dd) * * Called when we run out of PIO buffers. */ -void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) +static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 27034d38b3dd..0190edc8044e 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -171,32 +171,6 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages, return ret; } -/** - * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared - * @start_page: the page to lock - * @p: the output page structure - * - * This is similar to ipath_get_user_pages, but it's always one page, and we - * mark the page as locked for I/O, and shared. This is used for the user - * process page that contains the destination address for the rcvhdrq tail - * update, so we need to have the vma. If we don't do this, the page can be - * taken away from us on fork, even if the child never touches it, and then - * the user process never sees the tail register updates. - */ -int ipath_get_user_pages_nocopy(unsigned long page, struct page **p) -{ - struct vm_area_struct *vma; - int ret; - - down_write(¤t->mm->mmap_sem); - - ret = __get_user_pages(page, 1, p, &vma); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - void ipath_release_user_pages(struct page **p, size_t num_pages) { down_write(¤t->mm->mmap_sem); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 65f7181e9cf8..16aa61fd8085 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -488,7 +488,7 @@ bail:; * This is called from ipath_do_rcv_timer() at interrupt level to check for * QPs which need retransmits and to collect performance numbers. */ -void ipath_ib_timer(struct ipath_ibdev *dev) +static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; struct list_head *last; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index f3d1f2cee6f8..9bbe81967f14 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -782,8 +782,6 @@ void ipath_update_mmap_info(struct ipath_ibdev *dev, int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); - void ipath_insert_rnr_queue(struct ipath_qp *qp); int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); @@ -807,8 +805,6 @@ void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); int ipath_ib_piobufavail(struct ipath_ibdev *); -void ipath_ib_timer(struct ipath_ibdev *); - unsigned ipath_get_npkeys(struct ipath_devdata *); u32 ipath_get_cr_errpkey(struct ipath_devdata *); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 40042184ad58..b5a24fbef70d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1183,6 +1183,43 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_data_seg(struct mlx4_wqe_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1238,26 +1275,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_atomic_seg); + size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; @@ -1266,15 +1290,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; - break; default: @@ -1284,13 +1303,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, - &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - + set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; @@ -1313,12 +1326,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mlx4_wqe_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mlx4_wqe_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mlx4_wqe_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mlx4_wqe_data_seg); size += sizeof (struct mlx4_wqe_data_seg) / 16; @@ -1498,7 +1506,7 @@ static int to_ib_qp_access_flags(int mlx4_flags) static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { - memset(ib_ah_attr, 0, sizeof *path); + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) @@ -1515,7 +1523,7 @@ static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = - be32_to_cpu(path->tclass_flowlabel) & 0xffffff; + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); } @@ -1560,7 +1568,10 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr } qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; - qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + if (qp_attr->qp_state == IB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; @@ -1578,17 +1589,25 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr done: qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; - qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; - qp_attr->cap.max_send_sge = qp->sq.max_gs; - qp_attr->cap.max_recv_sge = qp->rq.max_gs; - qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - - send_wqe_overhead(qp->ibqp.qp_type) - - sizeof (struct mlx4_wqe_inline_seg); - qp_init_attr->cap = qp_attr->cap; + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; } + /* + * We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + return 0; } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index aa563e61de65..76fed7545c53 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -67,7 +67,7 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); static int msi = 0; module_param(msi, int, 0444); -MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); +MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)"); #else /* CONFIG_PCI_MSI */ @@ -1117,9 +1117,21 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (msi_x && !mthca_enable_msi_x(mdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI_X; - if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) && - !pci_enable_msi(pdev)) - mdev->mthca_flags |= MTHCA_FLAG_MSI; + else if (msi) { + static int warned; + + if (!warned) { + printk(KERN_WARNING PFX "WARNING: MSI support will be " + "removed from the ib_mthca driver in January 2008.\n"); + printk(KERN_WARNING " If you are using MSI and cannot " + "switch to MSI-X, please tell " + "<general@lists.openfabrics.org>.\n"); + ++warned; + } + + if (!pci_enable_msi(pdev)) + mdev->mthca_flags |= MTHCA_FLAG_MSI; + } if (mthca_cmd_init(mdev)) { mthca_err(mdev, "Failed to init command interface, aborting.\n"); @@ -1135,7 +1147,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_cmd; if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { - mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n", + mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[hca_type].latest_fw >> 32), diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 11f1d99db40b..df01b2026a64 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1578,6 +1578,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, return cur + nreq >= wq->max; } +static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } + +} + +static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, + struct ib_send_wr *wr) +{ + useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + +} + +static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, + struct ib_send_wr *wr) +{ + memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); + useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1590,8 +1629,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; int size; - int size0 = 0; - u32 f0 = 0; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); int ind; u8 op0 = 0; @@ -1636,25 +1682,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mthca_atomic_seg *) wqe)->compare = 0; - } - + set_atomic_seg(wqe, wr); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -1663,12 +1695,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1683,12 +1712,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1700,16 +1726,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - ((struct mthca_tavor_ud_seg *) wqe)->lkey = - cpu_to_be32(to_mah(wr->wr.ud.ah)->key); - ((struct mthca_tavor_ud_seg *) wqe)->av_addr = - cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - ((struct mthca_tavor_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_tavor_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - - wqe += sizeof (struct mthca_tavor_ud_seg); + set_tavor_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; @@ -1734,13 +1752,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1768,11 +1781,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, mthca_opcode[wr->opcode]); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size | + cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size | ((wr->send_flags & IB_SEND_FENCE) ? MTHCA_NEXT_FENCE : 0)); - if (!size0) { + if (!nreq) { size0 = size; op0 = mthca_opcode[wr->opcode]; f0 = wr->send_flags & IB_SEND_FENCE ? @@ -1822,7 +1835,14 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int nreq; int i; int size; - int size0 = 0; + /* + * size0 is only used if nreq != 0, and it will always be + * initialized the first time through the main loop before + * nreq is incremented. So nreq cannot become non-zero + * without initializing size0, and it is in fact never used + * uninitialized. + */ + int uninitialized_var(size0); int ind; void *wqe; void *prev_wqe; @@ -1863,13 +1883,8 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1881,7 +1896,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ((struct mthca_next_seg *) prev_wqe)->ee_nds = cpu_to_be32(MTHCA_NEXT_DBD | size); - if (!size0) + if (!nreq) size0 = size; ++ind; @@ -1903,7 +1918,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, qp->rq.next_ind = ind; qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; - size0 = 0; } } @@ -1945,8 +1959,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; int size; - int size0 = 0; - u32 f0 = 0; + /* + * f0 and size0 are only used if nreq != 0, and they will + * always be initialized the first time through the main loop + * before nreq is incremented. So nreq cannot become non-zero + * without initializing f0 and size0, and they are in fact + * never used uninitialized. + */ + int uninitialized_var(size0); + u32 uninitialized_var(f0); int ind; u8 op0 = 0; @@ -1966,7 +1987,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; - size0 = 0; /* * Make sure that descriptors are written before @@ -2017,26 +2037,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); - ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); - } else { - ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); - ((struct mthca_atomic_seg *) wqe)->compare = 0; - } - - wqe += sizeof (struct mthca_atomic_seg); + set_atomic_seg(wqe, wr); + wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; break; @@ -2044,12 +2050,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2064,12 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); - ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); - ((struct mthca_raddr_seg *) wqe)->reserved = 0; - wqe += sizeof (struct mthca_raddr_seg); + set_raddr_seg(wqe, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2081,14 +2081,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, - to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - ((struct mthca_arbel_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_arbel_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); - - wqe += sizeof (struct mthca_arbel_ud_seg); + set_arbel_ud_seg(wqe, wr); + wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; break; @@ -2113,13 +2107,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); - wqe += sizeof (struct mthca_data_seg); + mthca_set_data_seg(wqe, wr->sg_list + i); + wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -2151,7 +2140,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ((wr->send_flags & IB_SEND_FENCE) ? MTHCA_NEXT_FENCE : 0)); - if (!size0) { + if (!nreq) { size0 = size; op0 = mthca_opcode[wr->opcode]; f0 = wr->send_flags & IB_SEND_FENCE ? @@ -2241,20 +2230,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < qp->rq.max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < qp->rq.max_gs) + mthca_set_data_seg_inval(wqe); qp->wrid[ind] = wr->wr_id; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index b8f05a526673..88d219e730ad 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -543,20 +543,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); ((struct mthca_next_seg *) prev_wqe)->nda_op = cpu_to_be32((ind << srq->wqe_shift) | 1); @@ -662,20 +654,12 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } for (i = 0; i < wr->num_sge; ++i) { - ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); - ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); - ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + mthca_set_data_seg(wqe, wr->sg_list + i); wqe += sizeof (struct mthca_data_seg); } - if (i < srq->max_gs) { - ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); - ((struct mthca_data_seg *) wqe)->addr = 0; - } + if (i < srq->max_gs) + mthca_set_data_seg_inval(wqe); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index e7d2c1e86199..f6a66fe78e48 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -113,4 +113,19 @@ struct mthca_mlx_seg { __be16 vcrc; }; +static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg, + struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg) +{ + dseg->byte_count = 0; + dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + dseg->addr = 0; +} + #endif /* MTHCA_WQE_H */ diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e2353701e8bb..1ee867b1b341 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -310,8 +310,6 @@ int iser_conn_init(struct iser_conn **ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); -void iser_conn_release(struct iser_conn *ib_conn); - void iser_rcv_completion(struct iser_desc *desc, unsigned long dto_xfer_len); @@ -329,9 +327,6 @@ void iser_reg_single(struct iser_device *device, struct iser_regd_buf *regd_buf, enum dma_data_direction direction); -int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, - enum iser_data_dir cmd_dir); - void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, enum iser_data_dir cmd_dir); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index fc9f1fd0ae54..36cdf77ae92a 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -103,8 +103,8 @@ void iser_reg_single(struct iser_device *device, /** * iser_start_rdma_unaligned_sg */ -int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, - enum iser_data_dir cmd_dir) +static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, + enum iser_data_dir cmd_dir) { int dma_nents; struct ib_device *dev; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2044de1164ac..d42ec0156eec 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -311,6 +311,29 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, } /** + * Frees all conn objects and deallocs conn descriptor + */ +static void iser_conn_release(struct iser_conn *ib_conn) +{ + struct iser_device *device = ib_conn->device; + + BUG_ON(ib_conn->state != ISER_CONN_DOWN); + + mutex_lock(&ig.connlist_mutex); + list_del(&ib_conn->conn_list); + mutex_unlock(&ig.connlist_mutex); + + iser_free_ib_conn_res(ib_conn); + ib_conn->device = NULL; + /* on EVENT_ADDR_ERROR there's no device yet for this conn */ + if (device != NULL) + iser_device_try_release(device); + if (ib_conn->iser_conn) + ib_conn->iser_conn->ib_conn = NULL; + kfree(ib_conn); +} + +/** * triggers start of the disconnect procedures and wait for them to be done */ void iser_conn_terminate(struct iser_conn *ib_conn) @@ -550,30 +573,6 @@ connect_failure: } /** - * Frees all conn objects and deallocs conn descriptor - */ -void iser_conn_release(struct iser_conn *ib_conn) -{ - struct iser_device *device = ib_conn->device; - - BUG_ON(ib_conn->state != ISER_CONN_DOWN); - - mutex_lock(&ig.connlist_mutex); - list_del(&ib_conn->conn_list); - mutex_unlock(&ig.connlist_mutex); - - iser_free_ib_conn_res(ib_conn); - ib_conn->device = NULL; - /* on EVENT_ADDR_ERROR there's no device yet for this conn */ - if (device != NULL) - iser_device_try_release(device); - if (ib_conn->iser_conn) - ib_conn->iser_conn->ib_conn = NULL; - kfree(ib_conn); -} - - -/** * iser_reg_page_vec - Register physical memory * * returns: 0 on success, errno code on failure diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index 5a7b49c35539..b10ffae7c39b 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -117,15 +117,13 @@ static int amba_kmi_probe(struct amba_device *dev, void *id) if (ret) return ret; - kmi = kmalloc(sizeof(struct amba_kmi_port), GFP_KERNEL); - io = kmalloc(sizeof(struct serio), GFP_KERNEL); + kmi = kzalloc(sizeof(struct amba_kmi_port), GFP_KERNEL); + io = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!kmi || !io) { ret = -ENOMEM; goto out; } - memset(kmi, 0, sizeof(struct amba_kmi_port)); - memset(io, 0, sizeof(struct serio)); io->id.type = SERIO_8042; io->write = amba_kmi_write; diff --git a/drivers/input/serio/pcips2.c b/drivers/input/serio/pcips2.c index ea5e3c6ddb62..1b404f9e3bff 100644 --- a/drivers/input/serio/pcips2.c +++ b/drivers/input/serio/pcips2.c @@ -140,15 +140,13 @@ static int __devinit pcips2_probe(struct pci_dev *dev, const struct pci_device_i if (ret) goto disable; - ps2if = kmalloc(sizeof(struct pcips2_data), GFP_KERNEL); - serio = kmalloc(sizeof(struct serio), GFP_KERNEL); + ps2if = kzalloc(sizeof(struct pcips2_data), GFP_KERNEL); + serio = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!ps2if || !serio) { ret = -ENOMEM; goto release; } - memset(ps2if, 0, sizeof(struct pcips2_data)); - memset(serio, 0, sizeof(struct serio)); serio->id.type = SERIO_8042; serio->write = pcips2_write; diff --git a/drivers/input/serio/sa1111ps2.c b/drivers/input/serio/sa1111ps2.c index d31ece8f68e9..2ad88780a170 100644 --- a/drivers/input/serio/sa1111ps2.c +++ b/drivers/input/serio/sa1111ps2.c @@ -234,15 +234,13 @@ static int __devinit ps2_probe(struct sa1111_dev *dev) struct serio *serio; int ret; - ps2if = kmalloc(sizeof(struct ps2if), GFP_KERNEL); - serio = kmalloc(sizeof(struct serio), GFP_KERNEL); + ps2if = kzalloc(sizeof(struct ps2if), GFP_KERNEL); + serio = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!ps2if || !serio) { ret = -ENOMEM; goto free; } - memset(ps2if, 0, sizeof(struct ps2if)); - memset(serio, 0, sizeof(struct serio)); serio->id.type = SERIO_8042; serio->write = ps2_write; diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index 5f7907e57090..97097ef3491e 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1146,14 +1146,12 @@ static int hisax_cs_setup(int cardnr, struct IsdnCard *card, } if (ret) { closecard(cardnr); - ret = 0; goto outf_cs; } init_tei(cs, cs->protocol); ret = CallcNewChan(cs); if (ret) { closecard(cardnr); - ret = 0; goto outf_cs; } /* ISAR needs firmware download first */ @@ -1165,7 +1163,7 @@ static int hisax_cs_setup(int cardnr, struct IsdnCard *card, outf_cs: kfree(cs); card->cs = NULL; - return ret; + return 0; } static int checkcard(int cardnr, char *id, int *busy_flag, struct module *lockowner) diff --git a/drivers/isdn/sc/card.h b/drivers/isdn/sc/card.h index 4fbfa825c3a2..5992f63c383e 100644 --- a/drivers/isdn/sc/card.h +++ b/drivers/isdn/sc/card.h @@ -125,7 +125,7 @@ int sendmessage(int card, unsigned int procid, unsigned int type, int receivemessage(int card, RspMessage *rspmsg); int sc_ioctl(int card, scs_ioctl *data); int setup_buffers(int card, int c); -void check_reset(unsigned long data); +void sc_check_reset(unsigned long data); void check_phystat(unsigned long data); #endif /* CARD_H */ diff --git a/drivers/isdn/sc/command.c b/drivers/isdn/sc/command.c index b7bb7cbcf503..0e4969c2ef95 100644 --- a/drivers/isdn/sc/command.c +++ b/drivers/isdn/sc/command.c @@ -344,7 +344,7 @@ int reset(int card) spin_lock_irqsave(&sc_adapter[card]->lock, flags); init_timer(&sc_adapter[card]->reset_timer); - sc_adapter[card]->reset_timer.function = check_reset; + sc_adapter[card]->reset_timer.function = sc_check_reset; sc_adapter[card]->reset_timer.data = card; sc_adapter[card]->reset_timer.expires = jiffies + CHECKRESET_TIME; add_timer(&sc_adapter[card]->reset_timer); diff --git a/drivers/isdn/sc/timer.c b/drivers/isdn/sc/timer.c index cc1b8861be2a..91fbe0dc28ec 100644 --- a/drivers/isdn/sc/timer.c +++ b/drivers/isdn/sc/timer.c @@ -43,7 +43,7 @@ static void setup_ports(int card) * Then, check to see if the signate has been set. Next, set the * signature to a known value and issue a startproc if needed. */ -void check_reset(unsigned long data) +void sc_check_reset(unsigned long data) { unsigned long flags; unsigned long sig; diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig new file mode 100644 index 000000000000..43d901fdc77f --- /dev/null +++ b/drivers/lguest/Kconfig @@ -0,0 +1,20 @@ +config LGUEST + tristate "Linux hypervisor example code" + depends on X86 && PARAVIRT && NET && EXPERIMENTAL && !X86_PAE + select LGUEST_GUEST + select HVC_DRIVER + ---help--- + This is a very simple module which allows you to run + multiple instances of the same Linux kernel, using the + "lguest" command found in the Documentation/lguest directory. + Note that "lguest" is pronounced to rhyme with "fell quest", + not "rustyvisor". See Documentation/lguest/lguest.txt. + + If unsure, say N. If curious, say M. If masochistic, say Y. + +config LGUEST_GUEST + bool + help + The guest needs code built-in, even if the host has lguest + support as a module. The drivers are tiny, so we build them + in too. diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile new file mode 100644 index 000000000000..55382c7d799c --- /dev/null +++ b/drivers/lguest/Makefile @@ -0,0 +1,7 @@ +# Guest requires the paravirt_ops replacement and the bus driver. +obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o + +# Host requires the other files, which can be a module. +obj-$(CONFIG_LGUEST) += lg.o +lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ + segments.o io.o lguest_user.o switcher.o diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c new file mode 100644 index 000000000000..ce909ec57499 --- /dev/null +++ b/drivers/lguest/core.c @@ -0,0 +1,462 @@ +/* World's simplest hypervisor, to test paravirt_ops and show + * unbelievers that virtualization is the future. Plus, it's fun! */ +#include <linux/module.h> +#include <linux/stringify.h> +#include <linux/stddef.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/cpu.h> +#include <linux/freezer.h> +#include <asm/paravirt.h> +#include <asm/desc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/poll.h> +#include <asm/highmem.h> +#include <asm/asm-offsets.h> +#include <asm/i387.h> +#include "lg.h" + +/* Found in switcher.S */ +extern char start_switcher_text[], end_switcher_text[], switch_to_guest[]; +extern unsigned long default_idt_entries[]; + +/* Every guest maps the core switcher code. */ +#define SHARED_SWITCHER_PAGES \ + DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) +/* Pages for switcher itself, then two pages per cpu */ +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) + +/* We map at -4M for ease of mapping into the guest (one PTE page). */ +#define SWITCHER_ADDR 0xFFC00000 + +static struct vm_struct *switcher_vma; +static struct page **switcher_page; + +static int cpu_had_pge; +static struct { + unsigned long offset; + unsigned short segment; +} lguest_entry; + +/* This One Big lock protects all inter-guest data structures. */ +DEFINE_MUTEX(lguest_lock); +static DEFINE_PER_CPU(struct lguest *, last_guest); + +/* FIXME: Make dynamic. */ +#define MAX_LGUEST_GUESTS 16 +struct lguest lguests[MAX_LGUEST_GUESTS]; + +/* Offset from where switcher.S was compiled to where we've copied it */ +static unsigned long switcher_offset(void) +{ + return SWITCHER_ADDR - (unsigned long)start_switcher_text; +} + +/* This cpu's struct lguest_pages. */ +static struct lguest_pages *lguest_pages(unsigned int cpu) +{ + return &(((struct lguest_pages *) + (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); +} + +static __init int map_switcher(void) +{ + int i, err; + struct page **pagep; + + switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, + GFP_KERNEL); + if (!switcher_page) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { + unsigned long addr = get_zeroed_page(GFP_KERNEL); + if (!addr) { + err = -ENOMEM; + goto free_some_pages; + } + switcher_page[i] = virt_to_page(addr); + } + + switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, + VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); + if (!switcher_vma) { + err = -ENOMEM; + printk("lguest: could not map switcher pages high\n"); + goto free_pages; + } + + pagep = switcher_page; + err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); + if (err) { + printk("lguest: map_vm_area failed: %i\n", err); + goto free_vma; + } + memcpy(switcher_vma->addr, start_switcher_text, + end_switcher_text - start_switcher_text); + + /* Fix up IDT entries to point into copied text. */ + for (i = 0; i < IDT_ENTRIES; i++) + default_idt_entries[i] += switcher_offset(); + + for_each_possible_cpu(i) { + struct lguest_pages *pages = lguest_pages(i); + struct lguest_ro_state *state = &pages->state; + + /* These fields are static: rest done in copy_in_guest_info */ + state->host_gdt_desc.size = GDT_SIZE-1; + state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); + store_idt(&state->host_idt_desc); + state->guest_idt_desc.size = sizeof(state->guest_idt)-1; + state->guest_idt_desc.address = (long)&state->guest_idt; + state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; + state->guest_gdt_desc.address = (long)&state->guest_gdt; + state->guest_tss.esp0 = (long)(&pages->regs + 1); + state->guest_tss.ss0 = LGUEST_DS; + /* No I/O for you! */ + state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); + setup_default_gdt_entries(state); + setup_default_idt_entries(state, default_idt_entries); + + /* Setup LGUEST segments on all cpus */ + get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; + get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; + } + + /* Initialize entry point into switcher. */ + lguest_entry.offset = (long)switch_to_guest + switcher_offset(); + lguest_entry.segment = LGUEST_CS; + + printk(KERN_INFO "lguest: mapped switcher at %p\n", + switcher_vma->addr); + return 0; + +free_vma: + vunmap(switcher_vma->addr); +free_pages: + i = TOTAL_SWITCHER_PAGES; +free_some_pages: + for (--i; i >= 0; i--) + __free_pages(switcher_page[i], 0); + kfree(switcher_page); +out: + return err; +} + +static void unmap_switcher(void) +{ + unsigned int i; + + vunmap(switcher_vma->addr); + for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) + __free_pages(switcher_page[i], 0); +} + +/* IN/OUT insns: enough to get us past boot-time probing. */ +static int emulate_insn(struct lguest *lg) +{ + u8 insn; + unsigned int insnlen = 0, in = 0, shift = 0; + unsigned long physaddr = guest_pa(lg, lg->regs->eip); + + /* This only works for addresses in linear mapping... */ + if (lg->regs->eip < lg->page_offset) + return 0; + lgread(lg, &insn, physaddr, 1); + + /* Operand size prefix means it's actually for ax. */ + if (insn == 0x66) { + shift = 16; + insnlen = 1; + lgread(lg, &insn, physaddr + insnlen, 1); + } + + switch (insn & 0xFE) { + case 0xE4: /* in <next byte>,%al */ + insnlen += 2; + in = 1; + break; + case 0xEC: /* in (%dx),%al */ + insnlen += 1; + in = 1; + break; + case 0xE6: /* out %al,<next byte> */ + insnlen += 2; + break; + case 0xEE: /* out %al,(%dx) */ + insnlen += 1; + break; + default: + return 0; + } + + if (in) { + /* Lower bit tells is whether it's a 16 or 32 bit access */ + if (insn & 0x1) + lg->regs->eax = 0xFFFFFFFF; + else + lg->regs->eax |= (0xFFFF << shift); + } + lg->regs->eip += insnlen; + return 1; +} + +int lguest_address_ok(const struct lguest *lg, + unsigned long addr, unsigned long len) +{ + return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); +} + +/* Just like get_user, but don't let guest access lguest binary. */ +u32 lgread_u32(struct lguest *lg, unsigned long addr) +{ + u32 val = 0; + + /* Don't let them access lguest binary */ + if (!lguest_address_ok(lg, addr, sizeof(val)) + || get_user(val, (u32 __user *)addr) != 0) + kill_guest(lg, "bad read address %#lx", addr); + return val; +} + +void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val) +{ + if (!lguest_address_ok(lg, addr, sizeof(val)) + || put_user(val, (u32 __user *)addr) != 0) + kill_guest(lg, "bad write address %#lx", addr); +} + +void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || copy_from_user(b, (void __user *)addr, bytes) != 0) { + /* copy_from_user should do this, but as we rely on it... */ + memset(b, 0, bytes); + kill_guest(lg, "bad read address %#lx len %u", addr, bytes); + } +} + +void lgwrite(struct lguest *lg, unsigned long addr, const void *b, + unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || copy_to_user((void __user *)addr, b, bytes) != 0) + kill_guest(lg, "bad write address %#lx len %u", addr, bytes); +} + +static void set_ts(void) +{ + u32 cr0; + + cr0 = read_cr0(); + if (!(cr0 & 8)) + write_cr0(cr0|8); +} + +static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) +{ + if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { + __get_cpu_var(last_guest) = lg; + lg->last_pages = pages; + lg->changed = CHANGED_ALL; + } + + /* These are pretty cheap, so we do them unconditionally. */ + pages->state.host_cr3 = __pa(current->mm->pgd); + map_switcher_in_guest(lg, pages); + pages->state.guest_tss.esp1 = lg->esp1; + pages->state.guest_tss.ss1 = lg->ss1; + + /* Copy direct trap entries. */ + if (lg->changed & CHANGED_IDT) + copy_traps(lg, pages->state.guest_idt, default_idt_entries); + + /* Copy all GDT entries but the TSS. */ + if (lg->changed & CHANGED_GDT) + copy_gdt(lg, pages->state.guest_gdt); + /* If only the TLS entries have changed, copy them. */ + else if (lg->changed & CHANGED_GDT_TLS) + copy_gdt_tls(lg, pages->state.guest_gdt); + + lg->changed = 0; +} + +static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) +{ + unsigned int clobber; + + copy_in_guest_info(lg, pages); + + /* Put eflags on stack, lcall does rest: suitable for iret return. */ + asm volatile("pushf; lcall *lguest_entry" + : "=a"(clobber), "=b"(clobber) + : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) + : "memory", "%edx", "%ecx", "%edi", "%esi"); +} + +int run_guest(struct lguest *lg, unsigned long __user *user) +{ + while (!lg->dead) { + unsigned int cr2 = 0; /* Damn gcc */ + + /* Hypercalls first: we might have been out to userspace */ + do_hypercalls(lg); + if (lg->dma_is_pending) { + if (put_user(lg->pending_dma, user) || + put_user(lg->pending_key, user+1)) + return -EFAULT; + return sizeof(unsigned long)*2; + } + + if (signal_pending(current)) + return -ERESTARTSYS; + + /* If Waker set break_out, return to Launcher. */ + if (lg->break_out) + return -EAGAIN; + + maybe_do_interrupt(lg); + + try_to_freeze(); + + if (lg->dead) + break; + + if (lg->halted) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + continue; + } + + local_irq_disable(); + + /* Even if *we* don't want FPU trap, guest might... */ + if (lg->ts) + set_ts(); + + /* Don't let Guest do SYSENTER: we can't handle it. */ + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); + + run_guest_once(lg, lguest_pages(raw_smp_processor_id())); + + /* Save cr2 now if we page-faulted. */ + if (lg->regs->trapnum == 14) + cr2 = read_cr2(); + else if (lg->regs->trapnum == 7) + math_state_restore(); + + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + local_irq_enable(); + + switch (lg->regs->trapnum) { + case 13: /* We've intercepted a GPF. */ + if (lg->regs->errcode == 0) { + if (emulate_insn(lg)) + continue; + } + break; + case 14: /* We've intercepted a page fault. */ + if (demand_page(lg, cr2, lg->regs->errcode)) + continue; + + /* If lguest_data is NULL, this won't hurt. */ + if (put_user(cr2, &lg->lguest_data->cr2)) + kill_guest(lg, "Writing cr2"); + break; + case 7: /* We've intercepted a Device Not Available fault. */ + /* If they don't want to know, just absorb it. */ + if (!lg->ts) + continue; + break; + case 32 ... 255: /* Real interrupt, fall thru */ + cond_resched(); + case LGUEST_TRAP_ENTRY: /* Handled at top of loop */ + continue; + } + + if (deliver_trap(lg, lg->regs->trapnum)) + continue; + + kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", + lg->regs->trapnum, lg->regs->eip, + lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode); + } + return -ENOENT; +} + +int find_free_guest(void) +{ + unsigned int i; + for (i = 0; i < MAX_LGUEST_GUESTS; i++) + if (!lguests[i].tsk) + return i; + return -1; +} + +static void adjust_pge(void *on) +{ + if (on) + write_cr4(read_cr4() | X86_CR4_PGE); + else + write_cr4(read_cr4() & ~X86_CR4_PGE); +} + +static int __init init(void) +{ + int err; + + if (paravirt_enabled()) { + printk("lguest is afraid of %s\n", paravirt_ops.name); + return -EPERM; + } + + err = map_switcher(); + if (err) + return err; + + err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); + if (err) { + unmap_switcher(); + return err; + } + lguest_io_init(); + + err = lguest_device_init(); + if (err) { + free_pagetables(); + unmap_switcher(); + return err; + } + lock_cpu_hotplug(); + if (cpu_has_pge) { /* We have a broader idea of "global". */ + cpu_had_pge = 1; + on_each_cpu(adjust_pge, (void *)0, 0, 1); + clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + } + unlock_cpu_hotplug(); + return 0; +} + +static void __exit fini(void) +{ + lguest_device_remove(); + free_pagetables(); + unmap_switcher(); + lock_cpu_hotplug(); + if (cpu_had_pge) { + set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); + on_each_cpu(adjust_pge, (void *)1, 0, 1); + } + unlock_cpu_hotplug(); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c new file mode 100644 index 000000000000..ea52ca451f74 --- /dev/null +++ b/drivers/lguest/hypercalls.c @@ -0,0 +1,192 @@ +/* Actual hypercalls, which allow guests to actually do something. + Copyright (C) 2006 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <irq_vectors.h> +#include "lg.h" + +static void do_hcall(struct lguest *lg, struct lguest_regs *regs) +{ + switch (regs->eax) { + case LHCALL_FLUSH_ASYNC: + break; + case LHCALL_LGUEST_INIT: + kill_guest(lg, "already have lguest_data"); + break; + case LHCALL_CRASH: { + char msg[128]; + lgread(lg, msg, regs->edx, sizeof(msg)); + msg[sizeof(msg)-1] = '\0'; + kill_guest(lg, "CRASH: %s", msg); + break; + } + case LHCALL_FLUSH_TLB: + if (regs->edx) + guest_pagetable_clear_all(lg); + else + guest_pagetable_flush_user(lg); + break; + case LHCALL_GET_WALLCLOCK: { + struct timespec ts; + ktime_get_real_ts(&ts); + regs->eax = ts.tv_sec; + break; + } + case LHCALL_BIND_DMA: + regs->eax = bind_dma(lg, regs->edx, regs->ebx, + regs->ecx >> 8, regs->ecx & 0xFF); + break; + case LHCALL_SEND_DMA: + send_dma(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_GDT: + load_guest_gdt(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_IDT_ENTRY: + load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx); + break; + case LHCALL_NEW_PGTABLE: + guest_new_pagetable(lg, regs->edx); + break; + case LHCALL_SET_STACK: + guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx); + break; + case LHCALL_SET_PTE: + guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx)); + break; + case LHCALL_SET_PMD: + guest_set_pmd(lg, regs->edx, regs->ebx); + break; + case LHCALL_LOAD_TLS: + guest_load_tls(lg, regs->edx); + break; + case LHCALL_SET_CLOCKEVENT: + guest_set_clockevent(lg, regs->edx); + break; + case LHCALL_TS: + lg->ts = regs->edx; + break; + case LHCALL_HALT: + lg->halted = 1; + break; + default: + kill_guest(lg, "Bad hypercall %li\n", regs->eax); + } +} + +/* We always do queued calls before actual hypercall. */ +static void do_async_hcalls(struct lguest *lg) +{ + unsigned int i; + u8 st[LHCALL_RING_SIZE]; + + if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) + return; + + for (i = 0; i < ARRAY_SIZE(st); i++) { + struct lguest_regs regs; + unsigned int n = lg->next_hcall; + + if (st[n] == 0xFF) + break; + + if (++lg->next_hcall == LHCALL_RING_SIZE) + lg->next_hcall = 0; + + if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax) + || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx) + || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx) + || get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) { + kill_guest(lg, "Fetching async hypercalls"); + break; + } + + do_hcall(lg, ®s); + if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { + kill_guest(lg, "Writing result for async hypercall"); + break; + } + + if (lg->dma_is_pending) + break; + } +} + +static void initialize(struct lguest *lg) +{ + u32 tsc_speed; + + if (lg->regs->eax != LHCALL_LGUEST_INIT) { + kill_guest(lg, "hypercall %li before LGUEST_INIT", + lg->regs->eax); + return; + } + + /* We only tell the guest to use the TSC if it's reliable. */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) + tsc_speed = tsc_khz; + else + tsc_speed = 0; + + lg->lguest_data = (struct lguest_data __user *)lg->regs->edx; + /* We check here so we can simply copy_to_user/from_user */ + if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) { + kill_guest(lg, "bad guest page %p", lg->lguest_data); + return; + } + if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) + || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) + /* We reserve the top pgd entry. */ + || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) + || put_user(tsc_speed, &lg->lguest_data->tsc_khz) + || put_user(lg->guestid, &lg->lguest_data->guestid)) + kill_guest(lg, "bad guest page %p", lg->lguest_data); + + /* This is the one case where the above accesses might have + * been the first write to a Guest page. This may have caused + * a copy-on-write fault, but the Guest might be referring to + * the old (read-only) page. */ + guest_pagetable_clear_all(lg); +} + +/* Even if we go out to userspace and come back, we don't want to do + * the hypercall again. */ +static void clear_hcall(struct lguest *lg) +{ + lg->regs->trapnum = 255; +} + +void do_hypercalls(struct lguest *lg) +{ + if (unlikely(!lg->lguest_data)) { + if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) { + initialize(lg); + clear_hcall(lg); + } + return; + } + + do_async_hcalls(lg); + if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) { + do_hcall(lg, lg->regs); + clear_hcall(lg); + } +} diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c new file mode 100644 index 000000000000..d9de5bbc613f --- /dev/null +++ b/drivers/lguest/interrupts_and_traps.c @@ -0,0 +1,268 @@ +#include <linux/uaccess.h> +#include "lg.h" + +static unsigned long idt_address(u32 lo, u32 hi) +{ + return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); +} + +static int idt_type(u32 lo, u32 hi) +{ + return (hi >> 8) & 0xF; +} + +static int idt_present(u32 lo, u32 hi) +{ + return (hi & 0x8000); +} + +static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) +{ + *gstack -= 4; + lgwrite_u32(lg, *gstack, val); +} + +static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) +{ + unsigned long gstack; + u32 eflags, ss, irq_enable; + + /* If they want a ring change, we use new stack and push old ss/esp */ + if ((lg->regs->ss&0x3) != GUEST_PL) { + gstack = guest_pa(lg, lg->esp1); + ss = lg->ss1; + push_guest_stack(lg, &gstack, lg->regs->ss); + push_guest_stack(lg, &gstack, lg->regs->esp); + } else { + gstack = guest_pa(lg, lg->regs->esp); + ss = lg->regs->ss; + } + + /* We use IF bit in eflags to indicate whether irqs were disabled + (it's always 0, since irqs are enabled when guest is running). */ + eflags = lg->regs->eflags; + if (get_user(irq_enable, &lg->lguest_data->irq_enabled)) + irq_enable = 0; + eflags |= (irq_enable & X86_EFLAGS_IF); + + push_guest_stack(lg, &gstack, eflags); + push_guest_stack(lg, &gstack, lg->regs->cs); + push_guest_stack(lg, &gstack, lg->regs->eip); + + if (has_err) + push_guest_stack(lg, &gstack, lg->regs->errcode); + + /* Change the real stack so switcher returns to trap handler */ + lg->regs->ss = ss; + lg->regs->esp = gstack + lg->page_offset; + lg->regs->cs = (__KERNEL_CS|GUEST_PL); + lg->regs->eip = idt_address(lo, hi); + + /* Disable interrupts for an interrupt gate. */ + if (idt_type(lo, hi) == 0xE) + if (put_user(0, &lg->lguest_data->irq_enabled)) + kill_guest(lg, "Disabling interrupts"); +} + +void maybe_do_interrupt(struct lguest *lg) +{ + unsigned int irq; + DECLARE_BITMAP(blk, LGUEST_IRQS); + struct desc_struct *idt; + + if (!lg->lguest_data) + return; + + /* Mask out any interrupts they have blocked. */ + if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, + sizeof(blk))) + return; + + bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); + + irq = find_first_bit(blk, LGUEST_IRQS); + if (irq >= LGUEST_IRQS) + return; + + if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) + return; + + /* If they're halted, we re-enable interrupts. */ + if (lg->halted) { + /* Re-enable interrupts. */ + if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) + kill_guest(lg, "Re-enabling interrupts"); + lg->halted = 0; + } else { + /* Maybe they have interrupts disabled? */ + u32 irq_enabled; + if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) + irq_enabled = 0; + if (!irq_enabled) + return; + } + + idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; + if (idt_present(idt->a, idt->b)) { + clear_bit(irq, lg->irqs_pending); + set_guest_interrupt(lg, idt->a, idt->b, 0); + } +} + +static int has_err(unsigned int trap) +{ + return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); +} + +int deliver_trap(struct lguest *lg, unsigned int num) +{ + u32 lo = lg->idt[num].a, hi = lg->idt[num].b; + + if (!idt_present(lo, hi)) + return 0; + set_guest_interrupt(lg, lo, hi, has_err(num)); + return 1; +} + +static int direct_trap(const struct lguest *lg, + const struct desc_struct *trap, + unsigned int num) +{ + /* Hardware interrupts don't go to guest (except syscall). */ + if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) + return 0; + + /* We intercept page fault (demand shadow paging & cr2 saving) + protection fault (in/out emulation) and device not + available (TS handling), and hypercall */ + if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) + return 0; + + /* Interrupt gates (0xE) or not present (0x0) can't go direct. */ + return idt_type(trap->a, trap->b) == 0xF; +} + +void pin_stack_pages(struct lguest *lg) +{ + unsigned int i; + + for (i = 0; i < lg->stack_pages; i++) + pin_page(lg, lg->esp1 - i * PAGE_SIZE); +} + +void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) +{ + /* You cannot have a stack segment with priv level 0. */ + if ((seg & 0x3) != GUEST_PL) + kill_guest(lg, "bad stack segment %i", seg); + if (pages > 2) + kill_guest(lg, "bad stack pages %u", pages); + lg->ss1 = seg; + lg->esp1 = esp; + lg->stack_pages = pages; + pin_stack_pages(lg); +} + +/* Set up trap in IDT. */ +static void set_trap(struct lguest *lg, struct desc_struct *trap, + unsigned int num, u32 lo, u32 hi) +{ + u8 type = idt_type(lo, hi); + + if (!idt_present(lo, hi)) { + trap->a = trap->b = 0; + return; + } + + if (type != 0xE && type != 0xF) + kill_guest(lg, "bad IDT type %i", type); + + trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); + trap->b = (hi&0xFFFFEF00); +} + +void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) +{ + /* Guest never handles: NMI, doublefault, hypercall, spurious irq. */ + if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) + return; + + lg->changed |= CHANGED_IDT; + if (num < ARRAY_SIZE(lg->idt)) + set_trap(lg, &lg->idt[num], num, lo, hi); + else if (num == SYSCALL_VECTOR) + set_trap(lg, &lg->syscall_idt, num, lo, hi); +} + +static void default_idt_entry(struct desc_struct *idt, + int trap, + const unsigned long handler) +{ + u32 flags = 0x8e00; + + /* They can't "int" into any of them except hypercall. */ + if (trap == LGUEST_TRAP_ENTRY) + flags |= (GUEST_PL << 13); + + idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); + idt->b = (handler&0xFFFF0000) | flags; +} + +void setup_default_idt_entries(struct lguest_ro_state *state, + const unsigned long *def) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(state->guest_idt); i++) + default_idt_entry(&state->guest_idt[i], i, def[i]); +} + +void copy_traps(const struct lguest *lg, struct desc_struct *idt, + const unsigned long *def) +{ + unsigned int i; + + /* All hardware interrupts are same whatever the guest: only the + * traps might be different. */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { + if (direct_trap(lg, &lg->idt[i], i)) + idt[i] = lg->idt[i]; + else + default_idt_entry(&idt[i], i, def[i]); + } + i = SYSCALL_VECTOR; + if (direct_trap(lg, &lg->syscall_idt, i)) + idt[i] = lg->syscall_idt; + else + default_idt_entry(&idt[i], i, def[i]); +} + +void guest_set_clockevent(struct lguest *lg, unsigned long delta) +{ + ktime_t expires; + + if (unlikely(delta == 0)) { + /* Clock event device is shutting down. */ + hrtimer_cancel(&lg->hrt); + return; + } + + expires = ktime_add_ns(ktime_get_real(), delta); + hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) +{ + struct lguest *lg = container_of(timer, struct lguest, hrt); + + set_bit(0, lg->irqs_pending); + if (lg->halted) + wake_up_process(lg->tsk); + return HRTIMER_NORESTART; +} + +void init_clockdev(struct lguest *lg) +{ + hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); + lg->hrt.function = clockdev_fn; +} diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c new file mode 100644 index 000000000000..06bdba2337ef --- /dev/null +++ b/drivers/lguest/io.c @@ -0,0 +1,399 @@ +/* Simple I/O model for guests, based on shared memory. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <linux/types.h> +#include <linux/futex.h> +#include <linux/jhash.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/uaccess.h> +#include "lg.h" + +static struct list_head dma_hash[61]; + +void lguest_io_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dma_hash); i++) + INIT_LIST_HEAD(&dma_hash[i]); +} + +/* FIXME: allow multi-page lengths. */ +static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (!dma->len[i]) + return 1; + if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) + goto kill; + if (dma->len[i] > PAGE_SIZE) + goto kill; + /* We could do over a page, but is it worth it? */ + if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) + goto kill; + } + return 1; + +kill: + kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); + return 0; +} + +static unsigned int hash(const union futex_key *key) +{ + return jhash2((u32*)&key->both.word, + (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + key->both.offset) + % ARRAY_SIZE(dma_hash); +} + +static inline int key_eq(const union futex_key *a, const union futex_key *b) +{ + return (a->both.word == b->both.word + && a->both.ptr == b->both.ptr + && a->both.offset == b->both.offset); +} + +/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ +static void unlink_dma(struct lguest_dma_info *dmainfo) +{ + BUG_ON(!mutex_is_locked(&lguest_lock)); + dmainfo->interrupt = 0; + list_del(&dmainfo->list); + drop_futex_key_refs(&dmainfo->key); +} + +static int unbind_dma(struct lguest *lg, + const union futex_key *key, + unsigned long dmas) +{ + int i, ret = 0; + + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { + unlink_dma(&lg->dma[i]); + ret = 1; + break; + } + } + return ret; +} + +int bind_dma(struct lguest *lg, + unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) +{ + unsigned int i; + int ret = 0; + union futex_key key; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + + if (interrupt >= LGUEST_IRQS) + return 0; + + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad dma key %#lx", ukey); + goto unlock; + } + get_futex_key_refs(&key); + + if (interrupt == 0) + ret = unbind_dma(lg, &key, dmas); + else { + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (lg->dma[i].interrupt) + continue; + + lg->dma[i].dmas = dmas; + lg->dma[i].num_dmas = numdmas; + lg->dma[i].next_dma = 0; + lg->dma[i].key = key; + lg->dma[i].guestid = lg->guestid; + lg->dma[i].interrupt = interrupt; + list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); + ret = 1; + goto unlock; + } + } + drop_futex_key_refs(&key); +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); + return ret; +} + +/* lgread from another guest */ +static int lgread_other(struct lguest *lg, + void *buf, u32 addr, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { + memset(buf, 0, bytes); + kill_guest(lg, "bad address in registered DMA struct"); + return 0; + } + return 1; +} + +/* lgwrite to another guest */ +static int lgwrite_other(struct lguest *lg, u32 addr, + const void *buf, unsigned bytes) +{ + if (!lguest_address_ok(lg, addr, bytes) + || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) + != bytes)) { + kill_guest(lg, "bad address writing to registered DMA"); + return 0; + } + return 1; +} + +static u32 copy_data(struct lguest *srclg, + const struct lguest_dma *src, + const struct lguest_dma *dst, + struct page *pages[]) +{ + unsigned int totlen, si, di, srcoff, dstoff; + void *maddr = NULL; + + totlen = 0; + si = di = 0; + srcoff = dstoff = 0; + while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] + && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { + u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); + + if (!maddr) + maddr = kmap(pages[di]); + + /* FIXME: This is not completely portable, since + archs do different things for copy_to_user_page. */ + if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, + (void *__user)src->addr[si], len) != 0) { + kill_guest(srclg, "bad address in sending DMA"); + totlen = 0; + break; + } + + totlen += len; + srcoff += len; + dstoff += len; + if (srcoff == src->len[si]) { + si++; + srcoff = 0; + } + if (dstoff == dst->len[di]) { + kunmap(pages[di]); + maddr = NULL; + di++; + dstoff = 0; + } + } + + if (maddr) + kunmap(pages[di]); + + return totlen; +} + +/* Src is us, ie. current. */ +static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, + struct lguest *dstlg, const struct lguest_dma *dst) +{ + int i; + u32 ret; + struct page *pages[LGUEST_MAX_DMA_SECTIONS]; + + if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) + return 0; + + /* First get the destination pages */ + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (dst->len[i] == 0) + break; + if (get_user_pages(dstlg->tsk, dstlg->mm, + dst->addr[i], 1, 1, 1, pages+i, NULL) + != 1) { + kill_guest(dstlg, "Error mapping DMA pages"); + ret = 0; + goto drop_pages; + } + } + + /* Now copy until we run out of src or dst. */ + ret = copy_data(srclg, src, dst, pages); + +drop_pages: + while (--i >= 0) + put_page(pages[i]); + return ret; +} + +static int dma_transfer(struct lguest *srclg, + unsigned long udma, + struct lguest_dma_info *dst) +{ + struct lguest_dma dst_dma, src_dma; + struct lguest *dstlg; + u32 i, dma = 0; + + dstlg = &lguests[dst->guestid]; + /* Get our dma list. */ + lgread(srclg, &src_dma, udma, sizeof(src_dma)); + + /* We can't deadlock against them dmaing to us, because this + * is all under the lguest_lock. */ + down_read(&dstlg->mm->mmap_sem); + + for (i = 0; i < dst->num_dmas; i++) { + dma = (dst->next_dma + i) % dst->num_dmas; + if (!lgread_other(dstlg, &dst_dma, + dst->dmas + dma * sizeof(struct lguest_dma), + sizeof(dst_dma))) { + goto fail; + } + if (!dst_dma.used_len) + break; + } + if (i != dst->num_dmas) { + unsigned long used_lenp; + unsigned int ret; + + ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); + /* Put used length in src. */ + lgwrite_u32(srclg, + udma+offsetof(struct lguest_dma, used_len), ret); + if (ret == 0 && src_dma.len[0] != 0) + goto fail; + + /* Make sure destination sees contents before length. */ + wmb(); + used_lenp = dst->dmas + + dma * sizeof(struct lguest_dma) + + offsetof(struct lguest_dma, used_len); + lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); + dst->next_dma++; + } + up_read(&dstlg->mm->mmap_sem); + + /* Do this last so dst doesn't simply sleep on lock. */ + set_bit(dst->interrupt, dstlg->irqs_pending); + wake_up_process(dstlg->tsk); + return i == dst->num_dmas; + +fail: + up_read(&dstlg->mm->mmap_sem); + return 0; +} + +void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) +{ + union futex_key key; + int empty = 0; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + +again: + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad sending DMA key"); + goto unlock; + } + /* Shared mapping? Look for other guests... */ + if (key.shared.offset & 1) { + struct lguest_dma_info *i; + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (i->guestid == lg->guestid) + continue; + if (!key_eq(&key, &i->key)) + continue; + + empty += dma_transfer(lg, udma, i); + break; + } + if (empty == 1) { + /* Give any recipients one chance to restock. */ + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + empty++; + goto again; + } + } else { + /* Private mapping: tell our userspace. */ + lg->dma_is_pending = 1; + lg->pending_dma = udma; + lg->pending_key = ukey; + } +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); +} + +void release_all_dma(struct lguest *lg) +{ + unsigned int i; + + BUG_ON(!mutex_is_locked(&lguest_lock)); + + down_read(&lg->mm->mmap_sem); + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (lg->dma[i].interrupt) + unlink_dma(&lg->dma[i]); + } + up_read(&lg->mm->mmap_sem); +} + +/* Userspace wants a dma buffer from this guest. */ +unsigned long get_dma_buffer(struct lguest *lg, + unsigned long ukey, unsigned long *interrupt) +{ + unsigned long ret = 0; + union futex_key key; + struct lguest_dma_info *i; + struct rw_semaphore *fshared = ¤t->mm->mmap_sem; + + mutex_lock(&lguest_lock); + down_read(fshared); + if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { + kill_guest(lg, "bad registered DMA buffer"); + goto unlock; + } + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { + unsigned int j; + for (j = 0; j < i->num_dmas; j++) { + struct lguest_dma dma; + + ret = i->dmas + j * sizeof(struct lguest_dma); + lgread(lg, &dma, ret, sizeof(dma)); + if (dma.used_len == 0) + break; + } + *interrupt = i->interrupt; + break; + } + } +unlock: + up_read(fshared); + mutex_unlock(&lguest_lock); + return ret; +} + diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h new file mode 100644 index 000000000000..3e2ddfbc816e --- /dev/null +++ b/drivers/lguest/lg.h @@ -0,0 +1,261 @@ +#ifndef _LGUEST_H +#define _LGUEST_H + +#include <asm/desc.h> + +#define GDT_ENTRY_LGUEST_CS 10 +#define GDT_ENTRY_LGUEST_DS 11 +#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8) +#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8) + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/stringify.h> +#include <linux/binfmts.h> +#include <linux/futex.h> +#include <linux/lguest.h> +#include <linux/lguest_launcher.h> +#include <linux/wait.h> +#include <linux/err.h> +#include <asm/semaphore.h> +#include "irq_vectors.h" + +#define GUEST_PL 1 + +struct lguest_regs +{ + /* Manually saved part. */ + unsigned long ebx, ecx, edx; + unsigned long esi, edi, ebp; + unsigned long gs; + unsigned long eax; + unsigned long fs, ds, es; + unsigned long trapnum, errcode; + /* Trap pushed part */ + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; +}; + +void free_pagetables(void); +int init_pagetables(struct page **switcher_page, unsigned int pages); + +/* Full 4G segment descriptors, suitable for CS and DS. */ +#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00}) +#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300}) + +struct lguest_dma_info +{ + struct list_head list; + union futex_key key; + unsigned long dmas; + u16 next_dma; + u16 num_dmas; + u16 guestid; + u8 interrupt; /* 0 when not registered */ +}; + +/* We have separate types for the guest's ptes & pgds and the shadow ptes & + * pgds. Since this host might use three-level pagetables and the guest and + * shadow pagetables don't, we can't use the normal pte_t/pgd_t. */ +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} spgd_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} spte_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} gpgd_t; +typedef union { + struct { unsigned flags:12, pfn:20; }; + struct { unsigned long val; } raw; +} gpte_t; +#define mkgpte(_val) ((gpte_t){.raw.val = _val}) +#define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) + +struct pgdir +{ + unsigned long cr3; + spgd_t *pgdir; +}; + +/* This is a guest-specific page (mapped ro) into the guest. */ +struct lguest_ro_state +{ + /* Host information we need to restore when we switch back. */ + u32 host_cr3; + struct Xgt_desc_struct host_idt_desc; + struct Xgt_desc_struct host_gdt_desc; + u32 host_sp; + + /* Fields which are used when guest is running. */ + struct Xgt_desc_struct guest_idt_desc; + struct Xgt_desc_struct guest_gdt_desc; + struct i386_hw_tss guest_tss; + struct desc_struct guest_idt[IDT_ENTRIES]; + struct desc_struct guest_gdt[GDT_ENTRIES]; +}; + +/* We have two pages shared with guests, per cpu. */ +struct lguest_pages +{ + /* This is the stack page mapped rw in guest */ + char spare[PAGE_SIZE - sizeof(struct lguest_regs)]; + struct lguest_regs regs; + + /* This is the host state & guest descriptor page, ro in guest */ + struct lguest_ro_state state; +} __attribute__((aligned(PAGE_SIZE))); + +#define CHANGED_IDT 1 +#define CHANGED_GDT 2 +#define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */ +#define CHANGED_ALL 3 + +/* The private info the thread maintains about the guest. */ +struct lguest +{ + /* At end of a page shared mapped over lguest_pages in guest. */ + unsigned long regs_page; + struct lguest_regs *regs; + struct lguest_data __user *lguest_data; + struct task_struct *tsk; + struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ + u16 guestid; + u32 pfn_limit; + u32 page_offset; + u32 cr2; + int halted; + int ts; + u32 next_hcall; + u32 esp1; + u8 ss1; + + /* Do we need to stop what we're doing and return to userspace? */ + int break_out; + wait_queue_head_t break_wq; + + /* Bitmap of what has changed: see CHANGED_* above. */ + int changed; + struct lguest_pages *last_pages; + + /* We keep a small number of these. */ + u32 pgdidx; + struct pgdir pgdirs[4]; + + /* Cached wakeup: we hold a reference to this task. */ + struct task_struct *wake; + + unsigned long noirq_start, noirq_end; + int dma_is_pending; + unsigned long pending_dma; /* struct lguest_dma */ + unsigned long pending_key; /* address they're sending to */ + + unsigned int stack_pages; + u32 tsc_khz; + + struct lguest_dma_info dma[LGUEST_MAX_DMA]; + + /* Dead? */ + const char *dead; + + /* The GDT entries copied into lguest_ro_state when running. */ + struct desc_struct gdt[GDT_ENTRIES]; + + /* The IDT entries: some copied into lguest_ro_state when running. */ + struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS]; + struct desc_struct syscall_idt; + + /* Virtual clock device */ + struct hrtimer hrt; + + /* Pending virtual interrupts */ + DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); +}; + +extern struct lguest lguests[]; +extern struct mutex lguest_lock; + +/* core.c: */ +u32 lgread_u32(struct lguest *lg, unsigned long addr); +void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val); +void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len); +void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len); +int find_free_guest(void); +int lguest_address_ok(const struct lguest *lg, + unsigned long addr, unsigned long len); +int run_guest(struct lguest *lg, unsigned long __user *user); + + +/* interrupts_and_traps.c: */ +void maybe_do_interrupt(struct lguest *lg); +int deliver_trap(struct lguest *lg, unsigned int num); +void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); +void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); +void pin_stack_pages(struct lguest *lg); +void setup_default_idt_entries(struct lguest_ro_state *state, + const unsigned long *def); +void copy_traps(const struct lguest *lg, struct desc_struct *idt, + const unsigned long *def); +void guest_set_clockevent(struct lguest *lg, unsigned long delta); +void init_clockdev(struct lguest *lg); + +/* segments.c: */ +void setup_default_gdt_entries(struct lguest_ro_state *state); +void setup_guest_gdt(struct lguest *lg); +void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); +void guest_load_tls(struct lguest *lg, unsigned long tls_array); +void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); +void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); + +/* page_tables.c: */ +int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); +void free_guest_pagetable(struct lguest *lg); +void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); +void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i); +void guest_pagetable_clear_all(struct lguest *lg); +void guest_pagetable_flush_user(struct lguest *lg); +void guest_set_pte(struct lguest *lg, unsigned long cr3, + unsigned long vaddr, gpte_t val); +void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); +int demand_page(struct lguest *info, unsigned long cr2, int errcode); +void pin_page(struct lguest *lg, unsigned long vaddr); + +/* lguest_user.c: */ +int lguest_device_init(void); +void lguest_device_remove(void); + +/* io.c: */ +void lguest_io_init(void); +int bind_dma(struct lguest *lg, + unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt); +void send_dma(struct lguest *info, unsigned long key, unsigned long udma); +void release_all_dma(struct lguest *lg); +unsigned long get_dma_buffer(struct lguest *lg, unsigned long key, + unsigned long *interrupt); + +/* hypercalls.c: */ +void do_hypercalls(struct lguest *lg); + +#define kill_guest(lg, fmt...) \ +do { \ + if (!(lg)->dead) { \ + (lg)->dead = kasprintf(GFP_ATOMIC, fmt); \ + if (!(lg)->dead) \ + (lg)->dead = ERR_PTR(-ENOMEM); \ + } \ +} while(0) + +static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) +{ + return vaddr - lg->page_offset; +} +#endif /* __ASSEMBLY__ */ +#endif /* _LGUEST_H */ diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c new file mode 100644 index 000000000000..b9a58b78c990 --- /dev/null +++ b/drivers/lguest/lguest.c @@ -0,0 +1,621 @@ +/* + * Lguest specific paravirt-ops implementation + * + * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/kernel.h> +#include <linux/start_kernel.h> +#include <linux/string.h> +#include <linux/console.h> +#include <linux/screen_info.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/lguest.h> +#include <linux/lguest_launcher.h> +#include <linux/lguest_bus.h> +#include <asm/paravirt.h> +#include <asm/param.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/desc.h> +#include <asm/setup.h> +#include <asm/e820.h> +#include <asm/mce.h> +#include <asm/io.h> +//#include <asm/sched-clock.h> + +/* Declarations for definitions in lguest_guest.S */ +extern char lguest_noirq_start[], lguest_noirq_end[]; +extern const char lgstart_cli[], lgend_cli[]; +extern const char lgstart_sti[], lgend_sti[]; +extern const char lgstart_popf[], lgend_popf[]; +extern const char lgstart_pushf[], lgend_pushf[]; +extern const char lgstart_iret[], lgend_iret[]; +extern void lguest_iret(void); + +struct lguest_data lguest_data = { + .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, + .noirq_start = (u32)lguest_noirq_start, + .noirq_end = (u32)lguest_noirq_end, + .blocked_interrupts = { 1 }, /* Block timer interrupts */ +}; +struct lguest_device_desc *lguest_devices; + +static enum paravirt_lazy_mode lazy_mode; +static void lguest_lazy_mode(enum paravirt_lazy_mode mode) +{ + if (mode == PARAVIRT_LAZY_FLUSH) { + if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE)) + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + } else { + lazy_mode = mode; + if (mode == PARAVIRT_LAZY_NONE) + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + } +} + +static void lazy_hcall(unsigned long call, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3) +{ + if (lazy_mode == PARAVIRT_LAZY_NONE) + hcall(call, arg1, arg2, arg3); + else + async_hcall(call, arg1, arg2, arg3); +} + +void async_hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + /* Note: This code assumes we're uniprocessor. */ + static unsigned int next_call; + unsigned long flags; + + local_irq_save(flags); + if (lguest_data.hcall_status[next_call] != 0xFF) { + /* Table full, so do normal hcall which will flush table. */ + hcall(call, arg1, arg2, arg3); + } else { + lguest_data.hcalls[next_call].eax = call; + lguest_data.hcalls[next_call].edx = arg1; + lguest_data.hcalls[next_call].ebx = arg2; + lguest_data.hcalls[next_call].ecx = arg3; + /* Make sure host sees arguments before "valid" flag. */ + wmb(); + lguest_data.hcall_status[next_call] = 0; + if (++next_call == LHCALL_RING_SIZE) + next_call = 0; + } + local_irq_restore(flags); +} + +void lguest_send_dma(unsigned long key, struct lguest_dma *dma) +{ + dma->used_len = 0; + hcall(LHCALL_SEND_DMA, key, __pa(dma), 0); +} + +int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas, + unsigned int num, u8 irq) +{ + if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq)) + return -ENOMEM; + return 0; +} + +void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas) +{ + hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0); +} + +/* For guests, device memory can be used as normal memory, so we cast away the + * __iomem to quieten sparse. */ +void *lguest_map(unsigned long phys_addr, unsigned long pages) +{ + return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages); +} + +void lguest_unmap(void *addr) +{ + iounmap((__force void __iomem *)addr); +} + +static unsigned long save_fl(void) +{ + return lguest_data.irq_enabled; +} + +static void restore_fl(unsigned long flags) +{ + /* FIXME: Check if interrupt pending... */ + lguest_data.irq_enabled = flags; +} + +static void irq_disable(void) +{ + lguest_data.irq_enabled = 0; +} + +static void irq_enable(void) +{ + /* FIXME: Check if interrupt pending... */ + lguest_data.irq_enabled = X86_EFLAGS_IF; +} + +static void lguest_write_idt_entry(struct desc_struct *dt, + int entrynum, u32 low, u32 high) +{ + write_dt_entry(dt, entrynum, low, high); + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high); +} + +static void lguest_load_idt(const struct Xgt_desc_struct *desc) +{ + unsigned int i; + struct desc_struct *idt = (void *)desc->address; + + for (i = 0; i < (desc->size+1)/8; i++) + hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); +} + +static void lguest_load_gdt(const struct Xgt_desc_struct *desc) +{ + BUG_ON((desc->size+1)/8 != GDT_ENTRIES); + hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); +} + +static void lguest_write_gdt_entry(struct desc_struct *dt, + int entrynum, u32 low, u32 high) +{ + write_dt_entry(dt, entrynum, low, high); + hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); +} + +static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) +{ + lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); +} + +static void lguest_set_ldt(const void *addr, unsigned entries) +{ +} + +static void lguest_load_tr_desc(void) +{ +} + +static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + int function = *eax; + + native_cpuid(eax, ebx, ecx, edx); + switch (function) { + case 1: /* Basic feature request. */ + /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ + *ecx &= 0x00002201; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ + *edx &= 0x07808101; + /* Host wants to know when we flush kernel pages: set PGE. */ + *edx |= 0x00002000; + break; + case 0x80000000: + /* Futureproof this a little: if they ask how much extended + * processor information, limit it to known fields. */ + if (*eax > 0x80000008) + *eax = 0x80000008; + break; + } +} + +static unsigned long current_cr0, current_cr3; +static void lguest_write_cr0(unsigned long val) +{ + lazy_hcall(LHCALL_TS, val & 8, 0, 0); + current_cr0 = val; +} + +static unsigned long lguest_read_cr0(void) +{ + return current_cr0; +} + +static void lguest_clts(void) +{ + lazy_hcall(LHCALL_TS, 0, 0, 0); + current_cr0 &= ~8U; +} + +static unsigned long lguest_read_cr2(void) +{ + return lguest_data.cr2; +} + +static void lguest_write_cr3(unsigned long cr3) +{ + lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); + current_cr3 = cr3; +} + +static unsigned long lguest_read_cr3(void) +{ + return current_cr3; +} + +/* Used to enable/disable PGE, but we don't care. */ +static unsigned long lguest_read_cr4(void) +{ + return 0; +} + +static void lguest_write_cr4(unsigned long val) +{ +} + +static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); +} + +/* We only support two-level pagetables at the moment. */ +static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + *pmdp = pmdval; + lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, + (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); +} + +/* FIXME: Eliminate all callers of this. */ +static void lguest_set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + /* Don't bother with hypercall before initial setup. */ + if (current_cr3) + lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); +} + +static void lguest_flush_tlb_single(unsigned long addr) +{ + /* Simply set it to zero, and it will fault back in. */ + lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); +} + +static void lguest_flush_tlb_user(void) +{ + lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); +} + +static void lguest_flush_tlb_kernel(void) +{ + lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); +} + +static void disable_lguest_irq(unsigned int irq) +{ + set_bit(irq, lguest_data.blocked_interrupts); +} + +static void enable_lguest_irq(unsigned int irq) +{ + clear_bit(irq, lguest_data.blocked_interrupts); + /* FIXME: If it's pending? */ +} + +static struct irq_chip lguest_irq_controller = { + .name = "lguest", + .mask = disable_lguest_irq, + .mask_ack = disable_lguest_irq, + .unmask = enable_lguest_irq, +}; + +static void __init lguest_init_IRQ(void) +{ + unsigned int i; + + for (i = 0; i < LGUEST_IRQS; i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != SYSCALL_VECTOR) { + set_intr_gate(vector, interrupt[i]); + set_irq_chip_and_handler(i, &lguest_irq_controller, + handle_level_irq); + } + } + irq_ctx_init(smp_processor_id()); +} + +static unsigned long lguest_get_wallclock(void) +{ + return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); +} + +static cycle_t lguest_clock_read(void) +{ + if (lguest_data.tsc_khz) + return native_read_tsc(); + else + return jiffies; +} + +/* This is what we tell the kernel is our clocksource. */ +static struct clocksource lguest_clock = { + .name = "lguest", + .rating = 400, + .read = lguest_clock_read, +}; + +/* We also need a "struct clock_event_device": Linux asks us to set it to go + * off some time in the future. Actually, James Morris figured all this out, I + * just applied the patch. */ +static int lguest_clockevent_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + if (delta < LG_CLOCK_MIN_DELTA) { + if (printk_ratelimit()) + printk(KERN_DEBUG "%s: small delta %lu ns\n", + __FUNCTION__, delta); + return -ETIME; + } + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); + return 0; +} + +static void lguest_clockevent_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + /* A 0 argument shuts the clock down. */ + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0); + break; + case CLOCK_EVT_MODE_ONESHOT: + /* This is what we expect. */ + break; + case CLOCK_EVT_MODE_PERIODIC: + BUG(); + } +} + +/* This describes our primitive timer chip. */ +static struct clock_event_device lguest_clockevent = { + .name = "lguest", + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = lguest_clockevent_set_next_event, + .set_mode = lguest_clockevent_set_mode, + .rating = INT_MAX, + .mult = 1, + .shift = 0, + .min_delta_ns = LG_CLOCK_MIN_DELTA, + .max_delta_ns = LG_CLOCK_MAX_DELTA, +}; + +/* This is the Guest timer interrupt handler (hardware interrupt 0). We just + * call the clockevent infrastructure and it does whatever needs doing. */ +static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) +{ + unsigned long flags; + + /* Don't interrupt us while this is running. */ + local_irq_save(flags); + lguest_clockevent.event_handler(&lguest_clockevent); + local_irq_restore(flags); +} + +static void lguest_time_init(void) +{ + set_irq_handler(0, lguest_time_irq); + + /* We use the TSC if the Host tells us we can, otherwise a dumb + * jiffies-based clock. */ + if (lguest_data.tsc_khz) { + lguest_clock.shift = 22; + lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, + lguest_clock.shift); + lguest_clock.mask = CLOCKSOURCE_MASK(64); + lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; + } else { + /* To understand this, start at kernel/time/jiffies.c... */ + lguest_clock.shift = 8; + lguest_clock.mult = (((u64)NSEC_PER_SEC<<8)/ACTHZ) << 8; + lguest_clock.mask = CLOCKSOURCE_MASK(32); + } + clocksource_register(&lguest_clock); + + /* We can't set cpumask in the initializer: damn C limitations! */ + lguest_clockevent.cpumask = cpumask_of_cpu(0); + clockevents_register_device(&lguest_clockevent); + + enable_lguest_irq(0); +} + +static void lguest_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0, + THREAD_SIZE/PAGE_SIZE); +} + +static void lguest_set_debugreg(int regno, unsigned long value) +{ + /* FIXME: Implement */ +} + +static void lguest_wbinvd(void) +{ +} + +#ifdef CONFIG_X86_LOCAL_APIC +static void lguest_apic_write(unsigned long reg, unsigned long v) +{ +} + +static unsigned long lguest_apic_read(unsigned long reg) +{ + return 0; +} +#endif + +static void lguest_safe_halt(void) +{ + hcall(LHCALL_HALT, 0, 0, 0); +} + +static void lguest_power_off(void) +{ + hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); +} + +static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) +{ + hcall(LHCALL_CRASH, __pa(p), 0, 0); + return NOTIFY_DONE; +} + +static struct notifier_block paniced = { + .notifier_call = lguest_panic +}; + +static __init char *lguest_memory_setup(void) +{ + /* We do this here because lockcheck barfs if before start_kernel */ + atomic_notifier_chain_register(&panic_notifier_list, &paniced); + + add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type); + return "LGUEST"; +} + +static const struct lguest_insns +{ + const char *start, *end; +} lguest_insns[] = { + [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, + [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, + [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, + [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, +}; +static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) +{ + unsigned int insn_len; + + /* Don't touch it if we don't have a replacement */ + if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) + return paravirt_patch_default(type, clobber, insns, len); + + insn_len = lguest_insns[type].end - lguest_insns[type].start; + + /* Similarly if we can't fit replacement. */ + if (len < insn_len) + return paravirt_patch_default(type, clobber, insns, len); + + memcpy(insns, lguest_insns[type].start, insn_len); + return insn_len; +} + +__init void lguest_init(void *boot) +{ + /* Copy boot parameters first. */ + memcpy(&boot_params, boot, PARAM_SIZE); + memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr), + COMMAND_LINE_SIZE); + + paravirt_ops.name = "lguest"; + paravirt_ops.paravirt_enabled = 1; + paravirt_ops.kernel_rpl = 1; + + paravirt_ops.save_fl = save_fl; + paravirt_ops.restore_fl = restore_fl; + paravirt_ops.irq_disable = irq_disable; + paravirt_ops.irq_enable = irq_enable; + paravirt_ops.load_gdt = lguest_load_gdt; + paravirt_ops.memory_setup = lguest_memory_setup; + paravirt_ops.cpuid = lguest_cpuid; + paravirt_ops.write_cr3 = lguest_write_cr3; + paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; + paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; + paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; + paravirt_ops.set_pte = lguest_set_pte; + paravirt_ops.set_pte_at = lguest_set_pte_at; + paravirt_ops.set_pmd = lguest_set_pmd; +#ifdef CONFIG_X86_LOCAL_APIC + paravirt_ops.apic_write = lguest_apic_write; + paravirt_ops.apic_write_atomic = lguest_apic_write; + paravirt_ops.apic_read = lguest_apic_read; +#endif + paravirt_ops.load_idt = lguest_load_idt; + paravirt_ops.iret = lguest_iret; + paravirt_ops.load_esp0 = lguest_load_esp0; + paravirt_ops.load_tr_desc = lguest_load_tr_desc; + paravirt_ops.set_ldt = lguest_set_ldt; + paravirt_ops.load_tls = lguest_load_tls; + paravirt_ops.set_debugreg = lguest_set_debugreg; + paravirt_ops.clts = lguest_clts; + paravirt_ops.read_cr0 = lguest_read_cr0; + paravirt_ops.write_cr0 = lguest_write_cr0; + paravirt_ops.init_IRQ = lguest_init_IRQ; + paravirt_ops.read_cr2 = lguest_read_cr2; + paravirt_ops.read_cr3 = lguest_read_cr3; + paravirt_ops.read_cr4 = lguest_read_cr4; + paravirt_ops.write_cr4 = lguest_write_cr4; + paravirt_ops.write_gdt_entry = lguest_write_gdt_entry; + paravirt_ops.write_idt_entry = lguest_write_idt_entry; + paravirt_ops.patch = lguest_patch; + paravirt_ops.safe_halt = lguest_safe_halt; + paravirt_ops.get_wallclock = lguest_get_wallclock; + paravirt_ops.time_init = lguest_time_init; + paravirt_ops.set_lazy_mode = lguest_lazy_mode; + paravirt_ops.wbinvd = lguest_wbinvd; + + hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); + + /* We use top of mem for initial pagetables. */ + init_pg_tables_end = __pa(pg0); + + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); + + reserve_top_address(lguest_data.reserve_mem); + + lockdep_init(); + + paravirt_disable_iospace(); + + cpu_detect(&new_cpu_data); + /* head.S usually sets up the first capability word, so do it here. */ + new_cpu_data.x86_capability[0] = cpuid_edx(1); + + /* Math is always hard! */ + new_cpu_data.hard_math = 1; + +#ifdef CONFIG_X86_MCE + mce_disabled = 1; +#endif + +#ifdef CONFIG_ACPI + acpi_disabled = 1; + acpi_ht = 0; +#endif + + add_preferred_console("hvc", 0, NULL); + + pm_power_off = lguest_power_off; + start_kernel(); +} diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S new file mode 100644 index 000000000000..00046c57b5ba --- /dev/null +++ b/drivers/lguest/lguest_asm.S @@ -0,0 +1,56 @@ +#include <linux/linkage.h> +#include <linux/lguest.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> + +/* FIXME: Once asm/processor-flags.h goes in, include that */ +#define X86_EFLAGS_IF 0x00000200 + +/* + * This is where we begin: we have a magic signature which the launcher looks + * for. The plan is that the Linux boot protocol will be extended with a + * "platform type" field which will guide us here from the normal entry point, + * but for the moment this suffices. We pass the virtual address of the boot + * info to lguest_init(). + * + * We put it in .init.text will be discarded after boot. + */ +.section .init.text, "ax", @progbits +.ascii "GenuineLguest" + /* Set up initial stack. */ + movl $(init_thread_union+THREAD_SIZE),%esp + movl %esi, %eax + addl $__PAGE_OFFSET, %eax + jmp lguest_init + +/* The templates for inline patching. */ +#define LGUEST_PATCH(name, insns...) \ + lgstart_##name: insns; lgend_##name:; \ + .globl lgstart_##name; .globl lgend_##name + +LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) + +.text +/* These demark the EIP range where host should never deliver interrupts. */ +.global lguest_noirq_start +.global lguest_noirq_end + +/* + * We move eflags word to lguest_data.irq_enabled to restore interrupt state. + * For page faults, gpfs and virtual interrupts, the hypervisor has saved + * eflags manually, otherwise it was delivered directly and so eflags reflects + * the real machine IF state, ie. interrupts on. Since the kernel always dies + * if it takes such a trap with interrupts disabled anyway, turning interrupts + * back on unconditionally here is OK. + */ +ENTRY(lguest_iret) + pushl %eax + movl 12(%esp), %eax +lguest_noirq_start: + movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled + popl %eax + iret +lguest_noirq_end: diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c new file mode 100644 index 000000000000..18d6ab21a43b --- /dev/null +++ b/drivers/lguest/lguest_bus.c @@ -0,0 +1,148 @@ +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/lguest_bus.h> +#include <asm/io.h> + +static ssize_t type_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hu", lguest_devices[dev->index].type); +} +static ssize_t features_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hx", lguest_devices[dev->index].features); +} +static ssize_t pfn_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%u", lguest_devices[dev->index].pfn); +} +static ssize_t status_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hx", lguest_devices[dev->index].status); +} +static ssize_t status_store(struct device *_dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1) + return -EINVAL; + return count; +} +static struct device_attribute lguest_dev_attrs[] = { + __ATTR_RO(type), + __ATTR_RO(features), + __ATTR_RO(pfn), + __ATTR(status, 0644, status_show, status_store), + __ATTR_NULL +}; + +static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv); + + return (drv->device_type == lguest_devices[dev->index].type); +} + +struct lguest_bus { + struct bus_type bus; + struct device dev; +}; + +static struct lguest_bus lguest_bus = { + .bus = { + .name = "lguest", + .match = lguest_dev_match, + .dev_attrs = lguest_dev_attrs, + }, + .dev = { + .parent = NULL, + .bus_id = "lguest", + } +}; + +static int lguest_dev_probe(struct device *_dev) +{ + int ret; + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(dev->dev.driver, + struct lguest_driver, drv); + + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER; + ret = drv->probe(dev); + if (ret == 0) + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK; + return ret; +} + +int register_lguest_driver(struct lguest_driver *drv) +{ + if (!lguest_devices) + return 0; + + drv->drv.bus = &lguest_bus.bus; + drv->drv.name = drv->name; + drv->drv.owner = drv->owner; + drv->drv.probe = lguest_dev_probe; + + return driver_register(&drv->drv); +} +EXPORT_SYMBOL_GPL(register_lguest_driver); + +static void add_lguest_device(unsigned int index) +{ + struct lguest_device *new; + + lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE; + new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL); + if (!new) { + printk(KERN_EMERG "Cannot allocate lguest device %u\n", index); + lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; + return; + } + + new->index = index; + new->private = NULL; + memset(&new->dev, 0, sizeof(new->dev)); + new->dev.parent = &lguest_bus.dev; + new->dev.bus = &lguest_bus.bus; + sprintf(new->dev.bus_id, "%u", index); + if (device_register(&new->dev) != 0) { + printk(KERN_EMERG "Cannot register lguest device %u\n", index); + lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; + kfree(new); + } +} + +static void scan_devices(void) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DEVICES; i++) + if (lguest_devices[i].type) + add_lguest_device(i); +} + +static int __init lguest_bus_init(void) +{ + if (strcmp(paravirt_ops.name, "lguest") != 0) + return 0; + + /* Devices are in page above top of "normal" mem. */ + lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); + + if (bus_register(&lguest_bus.bus) != 0 + || device_register(&lguest_bus.dev) != 0) + panic("lguest bus registration failed"); + + scan_devices(); + return 0; +} +postcore_initcall(lguest_bus_init); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c new file mode 100644 index 000000000000..e90d7a783daf --- /dev/null +++ b/drivers/lguest/lguest_user.c @@ -0,0 +1,236 @@ +/* Userspace control of the guest, via /dev/lguest. */ +#include <linux/uaccess.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include "lg.h" + +static void setup_regs(struct lguest_regs *regs, unsigned long start) +{ + /* Write out stack in format lguest expects, so we can switch to it. */ + regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; + regs->cs = __KERNEL_CS|GUEST_PL; + regs->eflags = 0x202; /* Interrupts enabled. */ + regs->eip = start; + /* esi points to our boot information (physical address 0) */ +} + +/* + addr */ +static long user_get_dma(struct lguest *lg, const u32 __user *input) +{ + unsigned long key, udma, irq; + + if (get_user(key, input) != 0) + return -EFAULT; + udma = get_dma_buffer(lg, key, &irq); + if (!udma) + return -ENOENT; + + /* We put irq number in udma->used_len. */ + lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq); + return udma; +} + +/* To force the Guest to stop running and return to the Launcher, the + * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The + * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ +static int break_guest_out(struct lguest *lg, const u32 __user *input) +{ + unsigned long on; + + /* Fetch whether they're turning break on or off.. */ + if (get_user(on, input) != 0) + return -EFAULT; + + if (on) { + lg->break_out = 1; + /* Pop it out (may be running on different CPU) */ + wake_up_process(lg->tsk); + /* Wait for them to reset it */ + return wait_event_interruptible(lg->break_wq, !lg->break_out); + } else { + lg->break_out = 0; + wake_up(&lg->break_wq); + return 0; + } +} + +/* + irq */ +static int user_send_irq(struct lguest *lg, const u32 __user *input) +{ + u32 irq; + + if (get_user(irq, input) != 0) + return -EFAULT; + if (irq >= LGUEST_IRQS) + return -EINVAL; + set_bit(irq, lg->irqs_pending); + return 0; +} + +static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) +{ + struct lguest *lg = file->private_data; + + if (!lg) + return -EINVAL; + + /* If you're not the task which owns the guest, go away. */ + if (current != lg->tsk) + return -EPERM; + + if (lg->dead) { + size_t len; + + if (IS_ERR(lg->dead)) + return PTR_ERR(lg->dead); + + len = min(size, strlen(lg->dead)+1); + if (copy_to_user(user, lg->dead, len) != 0) + return -EFAULT; + return len; + } + + if (lg->dma_is_pending) + lg->dma_is_pending = 0; + + return run_guest(lg, (unsigned long __user *)user); +} + +/* Take: pfnlimit, pgdir, start, pageoffset. */ +static int initialize(struct file *file, const u32 __user *input) +{ + struct lguest *lg; + int err, i; + u32 args[4]; + + /* We grab the Big Lguest lock, which protects the global array + * "lguests" and multiple simultaneous initializations. */ + mutex_lock(&lguest_lock); + + if (file->private_data) { + err = -EBUSY; + goto unlock; + } + + if (copy_from_user(args, input, sizeof(args)) != 0) { + err = -EFAULT; + goto unlock; + } + + i = find_free_guest(); + if (i < 0) { + err = -ENOSPC; + goto unlock; + } + lg = &lguests[i]; + lg->guestid = i; + lg->pfn_limit = args[0]; + lg->page_offset = args[3]; + lg->regs_page = get_zeroed_page(GFP_KERNEL); + if (!lg->regs_page) { + err = -ENOMEM; + goto release_guest; + } + lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs); + + err = init_guest_pagetable(lg, args[1]); + if (err) + goto free_regs; + + setup_regs(lg->regs, args[2]); + setup_guest_gdt(lg); + init_clockdev(lg); + lg->tsk = current; + lg->mm = get_task_mm(lg->tsk); + init_waitqueue_head(&lg->break_wq); + lg->last_pages = NULL; + file->private_data = lg; + + mutex_unlock(&lguest_lock); + + return sizeof(args); + +free_regs: + free_page(lg->regs_page); +release_guest: + memset(lg, 0, sizeof(*lg)); +unlock: + mutex_unlock(&lguest_lock); + return err; +} + +static ssize_t write(struct file *file, const char __user *input, + size_t size, loff_t *off) +{ + struct lguest *lg = file->private_data; + u32 req; + + if (get_user(req, input) != 0) + return -EFAULT; + input += sizeof(req); + + if (req != LHREQ_INITIALIZE && !lg) + return -EINVAL; + if (lg && lg->dead) + return -ENOENT; + + /* If you're not the task which owns the Guest, you can only break */ + if (lg && current != lg->tsk && req != LHREQ_BREAK) + return -EPERM; + + switch (req) { + case LHREQ_INITIALIZE: + return initialize(file, (const u32 __user *)input); + case LHREQ_GETDMA: + return user_get_dma(lg, (const u32 __user *)input); + case LHREQ_IRQ: + return user_send_irq(lg, (const u32 __user *)input); + case LHREQ_BREAK: + return break_guest_out(lg, (const u32 __user *)input); + default: + return -EINVAL; + } +} + +static int close(struct inode *inode, struct file *file) +{ + struct lguest *lg = file->private_data; + + if (!lg) + return 0; + + mutex_lock(&lguest_lock); + /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ + hrtimer_cancel(&lg->hrt); + release_all_dma(lg); + free_guest_pagetable(lg); + mmput(lg->mm); + if (!IS_ERR(lg->dead)) + kfree(lg->dead); + free_page(lg->regs_page); + memset(lg, 0, sizeof(*lg)); + mutex_unlock(&lguest_lock); + return 0; +} + +static struct file_operations lguest_fops = { + .owner = THIS_MODULE, + .release = close, + .write = write, + .read = read, +}; +static struct miscdevice lguest_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "lguest", + .fops = &lguest_fops, +}; + +int __init lguest_device_init(void) +{ + return misc_register(&lguest_dev); +} + +void __exit lguest_device_remove(void) +{ + misc_deregister(&lguest_dev); +} diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c new file mode 100644 index 000000000000..1b0ba09b1269 --- /dev/null +++ b/drivers/lguest/page_tables.c @@ -0,0 +1,411 @@ +/* Shadow page table operations. + * Copyright (C) Rusty Russell IBM Corporation 2006. + * GPL v2 and any later version */ +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/random.h> +#include <linux/percpu.h> +#include <asm/tlbflush.h> +#include "lg.h" + +#define PTES_PER_PAGE_SHIFT 10 +#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT) +#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1) + +static DEFINE_PER_CPU(spte_t *, switcher_pte_pages); +#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) + +static unsigned vaddr_to_pgd_index(unsigned long vaddr) +{ + return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); +} + +/* These access the shadow versions (ie. the ones used by the CPU). */ +static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) +{ + unsigned int index = vaddr_to_pgd_index(vaddr); + + if (index >= SWITCHER_PGD_INDEX) { + kill_guest(lg, "attempt to access switcher pages"); + index = 0; + } + return &lg->pgdirs[i].pgdir[index]; +} + +static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr) +{ + spte_t *page = __va(spgd.pfn << PAGE_SHIFT); + BUG_ON(!(spgd.flags & _PAGE_PRESENT)); + return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE]; +} + +/* These access the guest versions. */ +static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) +{ + unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); + return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t); +} + +static unsigned long gpte_addr(struct lguest *lg, + gpgd_t gpgd, unsigned long vaddr) +{ + unsigned long gpage = gpgd.pfn << PAGE_SHIFT; + BUG_ON(!(gpgd.flags & _PAGE_PRESENT)); + return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t); +} + +/* Do a virtual -> physical mapping on a user page. */ +static unsigned long get_pfn(unsigned long virtpfn, int write) +{ + struct page *page; + unsigned long ret = -1UL; + + down_read(¤t->mm->mmap_sem); + if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT, + 1, write, 1, &page, NULL) == 1) + ret = page_to_pfn(page); + up_read(¤t->mm->mmap_sem); + return ret; +} + +static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) +{ + spte_t spte; + unsigned long pfn; + + /* We ignore the global flag. */ + spte.flags = (gpte.flags & ~_PAGE_GLOBAL); + pfn = get_pfn(gpte.pfn, write); + if (pfn == -1UL) { + kill_guest(lg, "failed to get page %u", gpte.pfn); + /* Must not put_page() bogus page on cleanup. */ + spte.flags = 0; + } + spte.pfn = pfn; + return spte; +} + +static void release_pte(spte_t pte) +{ + if (pte.flags & _PAGE_PRESENT) + put_page(pfn_to_page(pte.pfn)); +} + +static void check_gpte(struct lguest *lg, gpte_t gpte) +{ + if ((gpte.flags & (_PAGE_PWT|_PAGE_PSE)) || gpte.pfn >= lg->pfn_limit) + kill_guest(lg, "bad page table entry"); +} + +static void check_gpgd(struct lguest *lg, gpgd_t gpgd) +{ + if ((gpgd.flags & ~_PAGE_TABLE) || gpgd.pfn >= lg->pfn_limit) + kill_guest(lg, "bad page directory entry"); +} + +/* FIXME: We hold reference to pages, which prevents them from being + swapped. It'd be nice to have a callback when Linux wants to swap out. */ + +/* We fault pages in, which allows us to update accessed/dirty bits. + * Return true if we got page. */ +int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) +{ + gpgd_t gpgd; + spgd_t *spgd; + unsigned long gpte_ptr; + gpte_t gpte; + spte_t *spte; + + gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); + if (!(gpgd.flags & _PAGE_PRESENT)) + return 0; + + spgd = spgd_addr(lg, lg->pgdidx, vaddr); + if (!(spgd->flags & _PAGE_PRESENT)) { + /* Get a page of PTEs for them. */ + unsigned long ptepage = get_zeroed_page(GFP_KERNEL); + /* FIXME: Steal from self in this case? */ + if (!ptepage) { + kill_guest(lg, "out of memory allocating pte page"); + return 0; + } + check_gpgd(lg, gpgd); + spgd->raw.val = (__pa(ptepage) | gpgd.flags); + } + + gpte_ptr = gpte_addr(lg, gpgd, vaddr); + gpte = mkgpte(lgread_u32(lg, gpte_ptr)); + + /* No page? */ + if (!(gpte.flags & _PAGE_PRESENT)) + return 0; + + /* Write to read-only page? */ + if ((errcode & 2) && !(gpte.flags & _PAGE_RW)) + return 0; + + /* User access to a non-user page? */ + if ((errcode & 4) && !(gpte.flags & _PAGE_USER)) + return 0; + + check_gpte(lg, gpte); + gpte.flags |= _PAGE_ACCESSED; + if (errcode & 2) + gpte.flags |= _PAGE_DIRTY; + + /* We're done with the old pte. */ + spte = spte_addr(lg, *spgd, vaddr); + release_pte(*spte); + + /* We don't make it writable if this isn't a write: later + * write will fault so we can set dirty bit in guest. */ + if (gpte.flags & _PAGE_DIRTY) + *spte = gpte_to_spte(lg, gpte, 1); + else { + gpte_t ro_gpte = gpte; + ro_gpte.flags &= ~_PAGE_RW; + *spte = gpte_to_spte(lg, ro_gpte, 0); + } + + /* Now we update dirty/accessed on guest. */ + lgwrite_u32(lg, gpte_ptr, gpte.raw.val); + return 1; +} + +/* This is much faster than the full demand_page logic. */ +static int page_writable(struct lguest *lg, unsigned long vaddr) +{ + spgd_t *spgd; + unsigned long flags; + + spgd = spgd_addr(lg, lg->pgdidx, vaddr); + if (!(spgd->flags & _PAGE_PRESENT)) + return 0; + + flags = spte_addr(lg, *spgd, vaddr)->flags; + return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); +} + +void pin_page(struct lguest *lg, unsigned long vaddr) +{ + if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) + kill_guest(lg, "bad stack page %#lx", vaddr); +} + +static void release_pgd(struct lguest *lg, spgd_t *spgd) +{ + if (spgd->flags & _PAGE_PRESENT) { + unsigned int i; + spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT); + for (i = 0; i < PTES_PER_PAGE; i++) + release_pte(ptepage[i]); + free_page((long)ptepage); + spgd->raw.val = 0; + } +} + +static void flush_user_mappings(struct lguest *lg, int idx) +{ + unsigned int i; + for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++) + release_pgd(lg, lg->pgdirs[idx].pgdir + i); +} + +void guest_pagetable_flush_user(struct lguest *lg) +{ + flush_user_mappings(lg, lg->pgdidx); +} + +static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].cr3 == pgtable) + break; + return i; +} + +static unsigned int new_pgdir(struct lguest *lg, + unsigned long cr3, + int *blank_pgdir) +{ + unsigned int next; + + next = random32() % ARRAY_SIZE(lg->pgdirs); + if (!lg->pgdirs[next].pgdir) { + lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[next].pgdir) + next = lg->pgdidx; + else + /* There are no mappings: you'll need to re-pin */ + *blank_pgdir = 1; + } + lg->pgdirs[next].cr3 = cr3; + /* Release all the non-kernel mappings. */ + flush_user_mappings(lg, next); + + return next; +} + +void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) +{ + int newpgdir, repin = 0; + + newpgdir = find_pgdir(lg, pgtable); + if (newpgdir == ARRAY_SIZE(lg->pgdirs)) + newpgdir = new_pgdir(lg, pgtable, &repin); + lg->pgdidx = newpgdir; + if (repin) + pin_stack_pages(lg); +} + +static void release_all_pagetables(struct lguest *lg) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].pgdir) + for (j = 0; j < SWITCHER_PGD_INDEX; j++) + release_pgd(lg, lg->pgdirs[i].pgdir + j); +} + +void guest_pagetable_clear_all(struct lguest *lg) +{ + release_all_pagetables(lg); + pin_stack_pages(lg); +} + +static void do_set_pte(struct lguest *lg, int idx, + unsigned long vaddr, gpte_t gpte) +{ + spgd_t *spgd = spgd_addr(lg, idx, vaddr); + if (spgd->flags & _PAGE_PRESENT) { + spte_t *spte = spte_addr(lg, *spgd, vaddr); + release_pte(*spte); + if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) { + check_gpte(lg, gpte); + *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY); + } else + spte->raw.val = 0; + } +} + +void guest_set_pte(struct lguest *lg, + unsigned long cr3, unsigned long vaddr, gpte_t gpte) +{ + /* Kernel mappings must be changed on all top levels. */ + if (vaddr >= lg->page_offset) { + unsigned int i; + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + if (lg->pgdirs[i].pgdir) + do_set_pte(lg, i, vaddr, gpte); + } else { + int pgdir = find_pgdir(lg, cr3); + if (pgdir != ARRAY_SIZE(lg->pgdirs)) + do_set_pte(lg, pgdir, vaddr, gpte); + } +} + +void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx) +{ + int pgdir; + + if (idx >= SWITCHER_PGD_INDEX) + return; + + pgdir = find_pgdir(lg, cr3); + if (pgdir < ARRAY_SIZE(lg->pgdirs)) + release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); +} + +int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) +{ + /* We assume this in flush_user_mappings, so check now */ + if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX) + return -EINVAL; + lg->pgdidx = 0; + lg->pgdirs[lg->pgdidx].cr3 = pgtable; + lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[lg->pgdidx].pgdir) + return -ENOMEM; + return 0; +} + +void free_guest_pagetable(struct lguest *lg) +{ + unsigned int i; + + release_all_pagetables(lg); + for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) + free_page((long)lg->pgdirs[i].pgdir); +} + +/* Caller must be preempt-safe */ +void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) +{ + spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); + spgd_t switcher_pgd; + spte_t regs_pte; + + /* Since switcher less that 4MB, we simply mug top pte page. */ + switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT; + switcher_pgd.flags = _PAGE_KERNEL; + lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; + + /* Map our regs page over stack page. */ + regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT; + regs_pte.flags = _PAGE_KERNEL; + switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE] + = regs_pte; +} + +static void free_switcher_pte_pages(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + free_page((long)switcher_pte_page(i)); +} + +static __init void populate_switcher_pte_page(unsigned int cpu, + struct page *switcher_page[], + unsigned int pages) +{ + unsigned int i; + spte_t *pte = switcher_pte_page(cpu); + + for (i = 0; i < pages; i++) { + pte[i].pfn = page_to_pfn(switcher_page[i]); + pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED; + } + + /* We only map this CPU's pages, so guest can't see others. */ + i = pages + cpu*2; + + /* First page (regs) is rw, second (state) is ro. */ + pte[i].pfn = page_to_pfn(switcher_page[i]); + pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW; + pte[i+1].pfn = page_to_pfn(switcher_page[i+1]); + pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED; +} + +__init int init_pagetables(struct page **switcher_page, unsigned int pages) +{ + unsigned int i; + + for_each_possible_cpu(i) { + switcher_pte_page(i) = (spte_t *)get_zeroed_page(GFP_KERNEL); + if (!switcher_pte_page(i)) { + free_switcher_pte_pages(); + return -ENOMEM; + } + populate_switcher_pte_page(i, switcher_page, pages); + } + return 0; +} + +void free_pagetables(void) +{ + free_switcher_pte_pages(); +} diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c new file mode 100644 index 000000000000..1b2cfe89dcd5 --- /dev/null +++ b/drivers/lguest/segments.c @@ -0,0 +1,125 @@ +#include "lg.h" + +static int desc_ok(const struct desc_struct *gdt) +{ + /* MBZ=0, P=1, DT=1 */ + return ((gdt->b & 0x00209000) == 0x00009000); +} + +static int segment_present(const struct desc_struct *gdt) +{ + return gdt->b & 0x8000; +} + +static int ignored_gdt(unsigned int num) +{ + return (num == GDT_ENTRY_TSS + || num == GDT_ENTRY_LGUEST_CS + || num == GDT_ENTRY_LGUEST_DS + || num == GDT_ENTRY_DOUBLEFAULT_TSS); +} + +/* We don't allow removal of CS, DS or SS; it doesn't make sense. */ +static void check_segment_use(struct lguest *lg, unsigned int desc) +{ + if (lg->regs->gs / 8 == desc) + lg->regs->gs = 0; + if (lg->regs->fs / 8 == desc) + lg->regs->fs = 0; + if (lg->regs->es / 8 == desc) + lg->regs->es = 0; + if (lg->regs->ds / 8 == desc + || lg->regs->cs / 8 == desc + || lg->regs->ss / 8 == desc) + kill_guest(lg, "Removed live GDT entry %u", desc); +} + +static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) +{ + unsigned int i; + + for (i = start; i < end; i++) { + /* We never copy these ones to real gdt */ + if (ignored_gdt(i)) + continue; + + /* We could fault in switch_to_guest if they are using + * a removed segment. */ + if (!segment_present(&lg->gdt[i])) { + check_segment_use(lg, i); + continue; + } + + if (!desc_ok(&lg->gdt[i])) + kill_guest(lg, "Bad GDT descriptor %i", i); + + /* DPL 0 presumably means "for use by guest". */ + if ((lg->gdt[i].b & 0x00006000) == 0) + lg->gdt[i].b |= (GUEST_PL << 13); + + /* Set accessed bit, since gdt isn't writable. */ + lg->gdt[i].b |= 0x00000100; + } +} + +void setup_default_gdt_entries(struct lguest_ro_state *state) +{ + struct desc_struct *gdt = state->guest_gdt; + unsigned long tss = (unsigned long)&state->guest_tss; + + /* Hypervisor segments. */ + gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; + gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; + + /* This is the one which we *cannot* copy from guest, since tss + is depended on this lguest_ro_state, ie. this cpu. */ + gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); + gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) + | ((tss >> 16) & 0x000000FF); +} + +void setup_guest_gdt(struct lguest *lg) +{ + lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; + lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; + lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); + lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); +} + +/* This is a fast version for the common case where only the three TLS entries + * have changed. */ +void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) +{ + unsigned int i; + + for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++) + gdt[i] = lg->gdt[i]; +} + +void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) +{ + unsigned int i; + + for (i = 0; i < GDT_ENTRIES; i++) + if (!ignored_gdt(i)) + gdt[i] = lg->gdt[i]; +} + +void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) +{ + if (num > ARRAY_SIZE(lg->gdt)) + kill_guest(lg, "too many gdt entries %i", num); + + lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0])); + fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt)); + lg->changed |= CHANGED_GDT; +} + +void guest_load_tls(struct lguest *lg, unsigned long gtls) +{ + struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN]; + + lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); + fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); + lg->changed |= CHANGED_GDT_TLS; +} diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S new file mode 100644 index 000000000000..eadd4cc299d2 --- /dev/null +++ b/drivers/lguest/switcher.S @@ -0,0 +1,159 @@ +/* This code sits at 0xFFC00000 to do the low-level guest<->host switch. + + There is are two pages above us for this CPU (struct lguest_pages). + The second page (struct lguest_ro_state) becomes read-only after the + context switch. The first page (the stack for traps) remains writable, + but while we're in here, the guest cannot be running. +*/ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include "lg.h" + +.text +ENTRY(start_switcher_text) + +/* %eax points to lguest pages for this CPU. %ebx contains cr3 value. + All normal registers can be clobbered! */ +ENTRY(switch_to_guest) + /* Save host segments on host stack. */ + pushl %es + pushl %ds + pushl %gs + pushl %fs + /* With CONFIG_FRAME_POINTER, gcc doesn't let us clobber this! */ + pushl %ebp + /* Save host stack. */ + movl %esp, LGUEST_PAGES_host_sp(%eax) + /* Switch to guest stack: if we get NMI we expect to be there. */ + movl %eax, %edx + addl $LGUEST_PAGES_regs, %edx + movl %edx, %esp + /* Switch to guest's GDT, IDT. */ + lgdt LGUEST_PAGES_guest_gdt_desc(%eax) + lidt LGUEST_PAGES_guest_idt_desc(%eax) + /* Switch to guest's TSS while GDT still writable. */ + movl $(GDT_ENTRY_TSS*8), %edx + ltr %dx + /* Set host's TSS GDT entry to available (clear byte 5 bit 2). */ + movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx + andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) + /* Switch to guest page tables: lguest_pages->state now read-only. */ + movl %ebx, %cr3 + /* Restore guest regs */ + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %gs + popl %eax + popl %fs + popl %ds + popl %es + /* Skip error code and trap number */ + addl $8, %esp + iret + +#define SWITCH_TO_HOST \ + /* Save guest state */ \ + pushl %es; \ + pushl %ds; \ + pushl %fs; \ + pushl %eax; \ + pushl %gs; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + /* Load lguest ds segment for convenience. */ \ + movl $(LGUEST_DS), %eax; \ + movl %eax, %ds; \ + /* Figure out where we are, based on stack (at top of regs). */ \ + movl %esp, %eax; \ + subl $LGUEST_PAGES_regs, %eax; \ + /* Put trap number in %ebx before we switch cr3 and lose it. */ \ + movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ + /* Switch to host page tables (host GDT, IDT and stack are in host \ + mem, so need this first) */ \ + movl LGUEST_PAGES_host_cr3(%eax), %edx; \ + movl %edx, %cr3; \ + /* Set guest's TSS to available (clear byte 5 bit 2). */ \ + andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ + /* Switch to host's GDT & IDT. */ \ + lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ + lidt LGUEST_PAGES_host_idt_desc(%eax); \ + /* Switch to host's stack. */ \ + movl LGUEST_PAGES_host_sp(%eax), %esp; \ + /* Switch to host's TSS */ \ + movl $(GDT_ENTRY_TSS*8), %edx; \ + ltr %dx; \ + popl %ebp; \ + popl %fs; \ + popl %gs; \ + popl %ds; \ + popl %es + +/* Return to run_guest_once. */ +return_to_host: + SWITCH_TO_HOST + iret + +deliver_to_host: + SWITCH_TO_HOST + /* Decode IDT and jump to hosts' irq handler. When that does iret, it + * will return to run_guest_once. This is a feature. */ + movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx + leal (%edx,%ebx,8), %eax + movzwl (%eax),%edx + movl 4(%eax), %eax + xorw %ax, %ax + orl %eax, %edx + jmp *%edx + +/* Real hardware interrupts are delivered straight to the host. Others + cause us to return to run_guest_once so it can decide what to do. Note + that some of these are overridden by the guest to deliver directly, and + never enter here (see load_guest_idt_entry). */ +.macro IRQ_STUB N TARGET + .data; .long 1f; .text; 1: + /* Make an error number for most traps, which don't have one. */ + .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) + pushl $0 + .endif + pushl $\N + jmp \TARGET + ALIGN +.endm + +.macro IRQ_STUBS FIRST LAST TARGET + irq=\FIRST + .rept \LAST-\FIRST+1 + IRQ_STUB irq \TARGET + irq=irq+1 + .endr +.endm + +/* We intercept every interrupt, because we may need to switch back to + * host. Unfortunately we can't tell them apart except by entry + * point, so we need 256 entry points. + */ +.data +.global default_idt_entries +default_idt_entries: +.text + IRQ_STUBS 0 1 return_to_host /* First two traps */ + IRQ_STUB 2 handle_nmi /* NMI */ + IRQ_STUBS 3 31 return_to_host /* Rest of traps */ + IRQ_STUBS 32 127 deliver_to_host /* Real interrupts */ + IRQ_STUB 128 return_to_host /* System call (overridden) */ + IRQ_STUBS 129 255 deliver_to_host /* Other real interrupts */ + +/* We ignore NMI and return. */ +handle_nmi: + addl $8, %esp + iret + +ENTRY(end_switcher_text) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c96b7fe882a4..ec9e5f32f0ae 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -365,10 +365,9 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, if (np == NULL) return NULL; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; - memset(dev, 0, sizeof(*dev)); dev->bus = &chip->lbus; dev->media_bay = in_bay; diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index f8e1a135bf9d..d409f6759482 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -1053,10 +1053,9 @@ static int smu_open(struct inode *inode, struct file *file) struct smu_private *pp; unsigned long flags; - pp = kmalloc(sizeof(struct smu_private), GFP_KERNEL); + pp = kzalloc(sizeof(struct smu_private), GFP_KERNEL); if (pp == 0) return -ENOMEM; - memset(pp, 0, sizeof(struct smu_private)); spin_lock_init(&pp->lock); pp->mode = smu_file_commands; init_waitqueue_head(&pp->wait); diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index 3d90fc002097..e43554e754a4 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -318,10 +318,9 @@ static struct i2c_client *attach_i2c_chip(int id, const char *name) if (adap == NULL) return NULL; - clt = kmalloc(sizeof(struct i2c_client), GFP_KERNEL); + clt = kzalloc(sizeof(struct i2c_client), GFP_KERNEL); if (clt == NULL) return NULL; - memset(clt, 0, sizeof(struct i2c_client)); clt->addr = (id >> 1) & 0x7f; clt->adapter = adap; diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 3d0354e96a97..5452da1bb1a5 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -431,9 +431,8 @@ do_probe( struct i2c_adapter *adapter, int addr, int kind ) | I2C_FUNC_SMBUS_WRITE_BYTE) ) return 0; - if( !(cl=kmalloc(sizeof(*cl), GFP_KERNEL)) ) + if( !(cl=kzalloc(sizeof(*cl), GFP_KERNEL)) ) return -ENOMEM; - memset( cl, 0, sizeof(struct i2c_client) ); cl->addr = addr; cl->adapter = adapter; diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index a0fabf3c2008..7e10c3ab4d50 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -117,10 +117,9 @@ static struct wf_lm75_sensor *wf_lm75_create(struct i2c_adapter *adapter, DBG("wf_lm75: creating %s device at address 0x%02x\n", ds1775 ? "ds1775" : "lm75", addr); - lm = kmalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); + lm = kzalloc(sizeof(struct wf_lm75_sensor), GFP_KERNEL); if (lm == NULL) return NULL; - memset(lm, 0, sizeof(struct wf_lm75_sensor)); /* Usual rant about sensor names not beeing very consistent in * the device-tree, oh well ... diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 1a876f9965e0..144071e70a93 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -951,13 +951,12 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors); - ms = kmalloc(len, GFP_KERNEL); + ms = kzalloc(len, GFP_KERNEL); if (!ms) { ti->error = "Cannot allocate mirror context"; return NULL; } - memset(ms, 0, len); spin_lock_init(&ms->lock); ms->ti = ti; diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 5a1449f485cf..28929b618e20 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -905,12 +905,11 @@ static int cinergyt2_probe (struct usb_interface *intf, struct cinergyt2 *cinergyt2; int err; - if (!(cinergyt2 = kmalloc (sizeof(struct cinergyt2), GFP_KERNEL))) { + if (!(cinergyt2 = kzalloc (sizeof(struct cinergyt2), GFP_KERNEL))) { dprintk(1, "out of memory?!?\n"); return -ENOMEM; } - memset (cinergyt2, 0, sizeof (struct cinergyt2)); usb_set_intfdata (intf, (void *) cinergyt2); mutex_init(&cinergyt2->sem); diff --git a/drivers/media/video/cpia2/cpia2_core.c b/drivers/media/video/cpia2/cpia2_core.c index 55aab8d38880..a76bd786cf13 100644 --- a/drivers/media/video/cpia2/cpia2_core.c +++ b/drivers/media/video/cpia2/cpia2_core.c @@ -2224,15 +2224,13 @@ struct camera_data *cpia2_init_camera_struct(void) { struct camera_data *cam; - cam = kmalloc(sizeof(*cam), GFP_KERNEL); + cam = kzalloc(sizeof(*cam), GFP_KERNEL); if (!cam) { ERR("couldn't kmalloc cpia2 struct\n"); return NULL; } - /* Default everything to 0 */ - memset(cam, 0, sizeof(struct camera_data)); cam->present = 1; mutex_init(&cam->busy_lock); diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index 507b1d4260ed..11cfcf18ec34 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -812,10 +812,9 @@ static int msp_attach(struct i2c_adapter *adapter, int address, int kind) int msp_product, msp_prod_hi, msp_prod_lo; int msp_rom; - client = kmalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc(sizeof(*client), GFP_KERNEL); if (client == NULL) return -ENOMEM; - memset(client, 0, sizeof(*client)); client->addr = address; client->adapter = adapter; client->driver = &i2c_driver; diff --git a/drivers/media/video/planb.c b/drivers/media/video/planb.c index 1455a8f4e930..4ab1af74a970 100644 --- a/drivers/media/video/planb.c +++ b/drivers/media/video/planb.c @@ -353,9 +353,8 @@ static int planb_prepare_open(struct planb *pb) * PLANB_DUMMY)*sizeof(struct dbdma_cmd) +(PLANB_MAXLINES*((PLANB_MAXPIXELS+7)& ~7))/8 +MAX_GBUFFERS*sizeof(unsigned int); - if ((pb->priv_space = kmalloc (size, GFP_KERNEL)) == 0) + if ((pb->priv_space = kzalloc (size, GFP_KERNEL)) == 0) return -ENOMEM; - memset ((void *) pb->priv_space, 0, size); pb->overlay_last1 = pb->ch1_cmd = (volatile struct dbdma_cmd *) DBDMA_ALIGN (pb->priv_space); pb->overlay_last2 = pb->ch2_cmd = pb->ch1_cmd + pb->tab_size; diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c index 2d9c0dd3b733..ff555129c82f 100644 --- a/drivers/media/video/usbvideo/vicam.c +++ b/drivers/media/video/usbvideo/vicam.c @@ -1130,13 +1130,12 @@ vicam_probe( struct usb_interface *intf, const struct usb_device_id *id) } if ((cam = - kmalloc(sizeof (struct vicam_camera), GFP_KERNEL)) == NULL) { + kzalloc(sizeof (struct vicam_camera), GFP_KERNEL)) == NULL) { printk(KERN_WARNING "could not allocate kernel memory for vicam_camera struct\n"); return -ENOMEM; } - memset(cam, 0, sizeof (struct vicam_camera)); cam->shutter_speed = 15; diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index 75f401d52fda..b4ed57e02729 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -200,9 +200,8 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size) { struct mcp *mcp; - mcp = kmalloc(sizeof(struct mcp) + size, GFP_KERNEL); + mcp = kzalloc(sizeof(struct mcp) + size, GFP_KERNEL); if (mcp) { - memset(mcp, 0, sizeof(struct mcp) + size); spin_lock_init(&mcp->lock); mcp->attached_device.parent = parent; mcp->attached_device.bus = &mcp_bus_type; diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 149810a084f5..e03f1bcd4f9f 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -484,12 +484,11 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_disable; } - ucb = kmalloc(sizeof(struct ucb1x00), GFP_KERNEL); + ucb = kzalloc(sizeof(struct ucb1x00), GFP_KERNEL); ret = -ENOMEM; if (!ucb) goto err_disable; - memset(ucb, 0, sizeof(struct ucb1x00)); ucb->cdev.class = &ucb1x00_class; ucb->cdev.dev = &mcp->attached_device; diff --git a/drivers/misc/asus-laptop.c b/drivers/misc/asus-laptop.c index 7798f590e5aa..f75306059971 100644 --- a/drivers/misc/asus-laptop.c +++ b/drivers/misc/asus-laptop.c @@ -979,10 +979,9 @@ static int asus_hotk_add(struct acpi_device *device) printk(ASUS_NOTICE "Asus Laptop Support version %s\n", ASUS_LAPTOP_VERSION); - hotk = kmalloc(sizeof(struct asus_hotk), GFP_KERNEL); + hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL); if (!hotk) return -ENOMEM; - memset(hotk, 0, sizeof(struct asus_hotk)); hotk->handle = device->handle; strcpy(acpi_device_name(device), ASUS_HOTK_DEVICE_NAME); diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c index b5df347c81b9..6497872df524 100644 --- a/drivers/misc/ibmasm/command.c +++ b/drivers/misc/ibmasm/command.c @@ -41,18 +41,16 @@ struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_s if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE) return NULL; - cmd = kmalloc(sizeof(struct command), GFP_KERNEL); + cmd = kzalloc(sizeof(struct command), GFP_KERNEL); if (cmd == NULL) return NULL; - memset(cmd, 0, sizeof(*cmd)); - cmd->buffer = kmalloc(buffer_size, GFP_KERNEL); + cmd->buffer = kzalloc(buffer_size, GFP_KERNEL); if (cmd->buffer == NULL) { kfree(cmd); return NULL; } - memset(cmd->buffer, 0, buffer_size); cmd->buffer_size = buffer_size; kobject_init(&cmd->kobj); diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index eb7b073734b8..22a7e8ba211d 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -563,11 +563,10 @@ static ssize_t remote_settings_file_write(struct file *file, const char __user * if (*offset != 0) return 0; - buff = kmalloc (count + 1, GFP_KERNEL); + buff = kzalloc (count + 1, GFP_KERNEL); if (!buff) return -ENOMEM; - memset(buff, 0x0, count + 1); if (copy_from_user(buff, ubuff, count)) { kfree(buff); diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c index fb03a853fac4..4f9d4a9da983 100644 --- a/drivers/misc/ibmasm/module.c +++ b/drivers/misc/ibmasm/module.c @@ -77,13 +77,12 @@ static int __devinit ibmasm_init_one(struct pci_dev *pdev, const struct pci_devi /* vnc client won't work without bus-mastering */ pci_set_master(pdev); - sp = kmalloc(sizeof(struct service_processor), GFP_KERNEL); + sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL); if (sp == NULL) { dev_err(&pdev->dev, "Failed to allocate memory\n"); result = -ENOMEM; goto error_kmalloc; } - memset(sp, 0, sizeof(struct service_processor)); spin_lock_init(&sp->lock); INIT_LIST_HEAD(&sp->command_queue); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index cbd4b6e3e17c..93fe2e5dd616 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -414,13 +414,12 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) return ERR_PTR(-ENOSPC); __set_bit(devidx, dev_use); - md = kmalloc(sizeof(struct mmc_blk_data), GFP_KERNEL); + md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL); if (!md) { ret = -ENOMEM; goto out; } - memset(md, 0, sizeof(struct mmc_blk_data)); /* * Set the read-only status based on the supported commands diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5fb659f8b20e..3073f679584b 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -838,6 +838,50 @@ config ULTRA32 <file:Documentation/networking/net-modules.txt>. The module will be called smc-ultra32. +config BFIN_MAC + tristate "Blackfin 536/537 on-chip mac support" + depends on NET_ETHERNET && (BF537 || BF536) && (!BF537_PORT_H) + select CRC32 + select BFIN_MAC_USE_L1 if DMA_UNCACHED_NONE + help + This is the driver for blackfin on-chip mac device. Say Y if you want it + compiled into the kernel. This driver is also available as a module + ( = code which can be inserted in and removed from the running kernel + whenever you want). The module will be called bfin_mac. + +config BFIN_MAC_USE_L1 + bool "Use L1 memory for rx/tx packets" + depends on BFIN_MAC && BF537 + default y + help + To get maximum network performace, you should use L1 memory as rx/tx buffers. + Say N here if you want to reserve L1 memory for other uses. + +config BFIN_TX_DESC_NUM + int "Number of transmit buffer packets" + depends on BFIN_MAC + range 6 10 if BFIN_MAC_USE_L1 + range 10 100 + default "10" + help + Set the number of buffer packets used in driver. + +config BFIN_RX_DESC_NUM + int "Number of receive buffer packets" + depends on BFIN_MAC + range 20 100 if BFIN_MAC_USE_L1 + range 20 800 + default "20" + help + Set the number of buffer packets used in driver. + +config BFIN_MAC_RMII + bool "RMII PHY Interface (EXPERIMENTAL)" + depends on BFIN_MAC && EXPERIMENTAL + default n + help + Use Reduced PHY MII Interface + config SMC9194 tristate "SMC 9194 support" depends on NET_VENDOR_SMC && (ISA || MAC && BROKEN) diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 0e286ab8855a..336af0635df8 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -177,6 +177,7 @@ obj-$(CONFIG_ZORRO8390) += zorro8390.o obj-$(CONFIG_HPLANCE) += hplance.o 7990.o obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o obj-$(CONFIG_EQUALIZER) += eql.o +obj-$(CONFIG_LGUEST_GUEST) += lguest_net.o obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o @@ -200,6 +201,7 @@ obj-$(CONFIG_S2IO) += s2io.o obj-$(CONFIG_MYRI10GE) += myri10ge/ obj-$(CONFIG_SMC91X) += smc91x.o obj-$(CONFIG_SMC911X) += smc911x.o +obj-$(CONFIG_BFIN_MAC) += bfin_mac.o obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_FEC_8XX) += fec_8xx/ obj-$(CONFIG_PASEMI_MAC) += pasemi_mac.o diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c index da713500654d..a7cac695a9bd 100644 --- a/drivers/net/arm/ether3.c +++ b/drivers/net/arm/ether3.c @@ -464,7 +464,7 @@ static void ether3_setmulticastlist(struct net_device *dev) if (dev->flags & IFF_PROMISC) { /* promiscuous mode */ priv(dev)->regs.config1 |= CFG1_RECVPROMISC; - } else if (dev->flags & IFF_ALLMULTI) { + } else if (dev->flags & IFF_ALLMULTI || dev->mc_count) { priv(dev)->regs.config1 |= CFG1_RECVSPECBRMULTI; } else priv(dev)->regs.config1 |= CFG1_RECVSPECBROAD; diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 96fb0ec905a7..37f1b6ff5c12 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -1519,14 +1519,13 @@ static void b44_setup_pseudo_magicp(struct b44 *bp) u8 *pwol_pattern; u8 pwol_mask[B44_PMASK_SIZE]; - pwol_pattern = kmalloc(B44_PATTERN_SIZE, GFP_KERNEL); + pwol_pattern = kzalloc(B44_PATTERN_SIZE, GFP_KERNEL); if (!pwol_pattern) { printk(KERN_ERR PFX "Memory not available for WOL\n"); return; } /* Ipv4 magic packet pattern - pattern 0.*/ - memset(pwol_pattern, 0, B44_PATTERN_SIZE); memset(pwol_mask, 0, B44_PMASK_SIZE); plen0 = b44_magic_pattern(bp->dev->dev_addr, pwol_pattern, pwol_mask, B44_ETHIPV4UDP_HLEN); diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c new file mode 100644 index 000000000000..9a08d656f1ce --- /dev/null +++ b/drivers/net/bfin_mac.c @@ -0,0 +1,1009 @@ +/* + * File: drivers/net/bfin_mac.c + * Based on: + * Maintainer: + * Bryan Wu <bryan.wu@analog.com> + * + * Original author: + * Luke Yang <luke.yang@analog.com> + * + * Created: + * Description: + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program ; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/timer.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/crc32.h> +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/ethtool.h> +#include <linux/mii.h> + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> + +#include <linux/platform_device.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> + +#include <asm/dma.h> +#include <linux/dma-mapping.h> + +#include <asm/blackfin.h> +#include <asm/cacheflush.h> +#include <asm/portmux.h> + +#include "bfin_mac.h" + +#define DRV_NAME "bfin_mac" +#define DRV_VERSION "1.1" +#define DRV_AUTHOR "Bryan Wu, Luke Yang" +#define DRV_DESC "Blackfin BF53[67] on-chip Ethernet MAC driver" + +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRV_DESC); + +#if defined(CONFIG_BFIN_MAC_USE_L1) +# define bfin_mac_alloc(dma_handle, size) l1_data_sram_zalloc(size) +# define bfin_mac_free(dma_handle, ptr) l1_data_sram_free(ptr) +#else +# define bfin_mac_alloc(dma_handle, size) \ + dma_alloc_coherent(NULL, size, dma_handle, GFP_KERNEL) +# define bfin_mac_free(dma_handle, ptr) \ + dma_free_coherent(NULL, sizeof(*ptr), ptr, dma_handle) +#endif + +#define PKT_BUF_SZ 1580 + +#define MAX_TIMEOUT_CNT 500 + +/* pointers to maintain transmit list */ +static struct net_dma_desc_tx *tx_list_head; +static struct net_dma_desc_tx *tx_list_tail; +static struct net_dma_desc_rx *rx_list_head; +static struct net_dma_desc_rx *rx_list_tail; +static struct net_dma_desc_rx *current_rx_ptr; +static struct net_dma_desc_tx *current_tx_ptr; +static struct net_dma_desc_tx *tx_desc; +static struct net_dma_desc_rx *rx_desc; + +static void desc_list_free(void) +{ + struct net_dma_desc_rx *r; + struct net_dma_desc_tx *t; + int i; +#if !defined(CONFIG_BFIN_MAC_USE_L1) + dma_addr_t dma_handle = 0; +#endif + + if (tx_desc) { + t = tx_list_head; + for (i = 0; i < CONFIG_BFIN_TX_DESC_NUM; i++) { + if (t) { + if (t->skb) { + dev_kfree_skb(t->skb); + t->skb = NULL; + } + t = t->next; + } + } + bfin_mac_free(dma_handle, tx_desc); + } + + if (rx_desc) { + r = rx_list_head; + for (i = 0; i < CONFIG_BFIN_RX_DESC_NUM; i++) { + if (r) { + if (r->skb) { + dev_kfree_skb(r->skb); + r->skb = NULL; + } + r = r->next; + } + } + bfin_mac_free(dma_handle, rx_desc); + } +} + +static int desc_list_init(void) +{ + int i; + struct sk_buff *new_skb; +#if !defined(CONFIG_BFIN_MAC_USE_L1) + /* + * This dma_handle is useless in Blackfin dma_alloc_coherent(). + * The real dma handler is the return value of dma_alloc_coherent(). + */ + dma_addr_t dma_handle; +#endif + + tx_desc = bfin_mac_alloc(&dma_handle, + sizeof(struct net_dma_desc_tx) * + CONFIG_BFIN_TX_DESC_NUM); + if (tx_desc == NULL) + goto init_error; + + rx_desc = bfin_mac_alloc(&dma_handle, + sizeof(struct net_dma_desc_rx) * + CONFIG_BFIN_RX_DESC_NUM); + if (rx_desc == NULL) + goto init_error; + + /* init tx_list */ + tx_list_head = tx_list_tail = tx_desc; + + for (i = 0; i < CONFIG_BFIN_TX_DESC_NUM; i++) { + struct net_dma_desc_tx *t = tx_desc + i; + struct dma_descriptor *a = &(t->desc_a); + struct dma_descriptor *b = &(t->desc_b); + + /* + * disable DMA + * read from memory WNR = 0 + * wordsize is 32 bits + * 6 half words is desc size + * large desc flow + */ + a->config = WDSIZE_32 | NDSIZE_6 | DMAFLOW_LARGE; + a->start_addr = (unsigned long)t->packet; + a->x_count = 0; + a->next_dma_desc = b; + + /* + * enabled DMA + * write to memory WNR = 1 + * wordsize is 32 bits + * disable interrupt + * 6 half words is desc size + * large desc flow + */ + b->config = DMAEN | WNR | WDSIZE_32 | NDSIZE_6 | DMAFLOW_LARGE; + b->start_addr = (unsigned long)(&(t->status)); + b->x_count = 0; + + t->skb = NULL; + tx_list_tail->desc_b.next_dma_desc = a; + tx_list_tail->next = t; + tx_list_tail = t; + } + tx_list_tail->next = tx_list_head; /* tx_list is a circle */ + tx_list_tail->desc_b.next_dma_desc = &(tx_list_head->desc_a); + current_tx_ptr = tx_list_head; + + /* init rx_list */ + rx_list_head = rx_list_tail = rx_desc; + + for (i = 0; i < CONFIG_BFIN_RX_DESC_NUM; i++) { + struct net_dma_desc_rx *r = rx_desc + i; + struct dma_descriptor *a = &(r->desc_a); + struct dma_descriptor *b = &(r->desc_b); + + /* allocate a new skb for next time receive */ + new_skb = dev_alloc_skb(PKT_BUF_SZ + 2); + if (!new_skb) { + printk(KERN_NOTICE DRV_NAME + ": init: low on mem - packet dropped\n"); + goto init_error; + } + skb_reserve(new_skb, 2); + r->skb = new_skb; + + /* + * enabled DMA + * write to memory WNR = 1 + * wordsize is 32 bits + * disable interrupt + * 6 half words is desc size + * large desc flow + */ + a->config = DMAEN | WNR | WDSIZE_32 | NDSIZE_6 | DMAFLOW_LARGE; + /* since RXDWA is enabled */ + a->start_addr = (unsigned long)new_skb->data - 2; + a->x_count = 0; + a->next_dma_desc = b; + + /* + * enabled DMA + * write to memory WNR = 1 + * wordsize is 32 bits + * enable interrupt + * 6 half words is desc size + * large desc flow + */ + b->config = DMAEN | WNR | WDSIZE_32 | DI_EN | + NDSIZE_6 | DMAFLOW_LARGE; + b->start_addr = (unsigned long)(&(r->status)); + b->x_count = 0; + + rx_list_tail->desc_b.next_dma_desc = a; + rx_list_tail->next = r; + rx_list_tail = r; + } + rx_list_tail->next = rx_list_head; /* rx_list is a circle */ + rx_list_tail->desc_b.next_dma_desc = &(rx_list_head->desc_a); + current_rx_ptr = rx_list_head; + + return 0; + +init_error: + desc_list_free(); + printk(KERN_ERR DRV_NAME ": kmalloc failed\n"); + return -ENOMEM; +} + + +/*---PHY CONTROL AND CONFIGURATION-----------------------------------------*/ + +/* Set FER regs to MUX in Ethernet pins */ +static int setup_pin_mux(int action) +{ +#if defined(CONFIG_BFIN_MAC_RMII) + u16 pin_req[] = P_RMII0; +#else + u16 pin_req[] = P_MII0; +#endif + + if (action) { + if (peripheral_request_list(pin_req, DRV_NAME)) { + printk(KERN_ERR DRV_NAME + ": Requesting Peripherals failed\n"); + return -EFAULT; + } + } else + peripheral_free_list(pin_req); + + return 0; +} + +/* Wait until the previous MDC/MDIO transaction has completed */ +static void poll_mdc_done(void) +{ + int timeout_cnt = MAX_TIMEOUT_CNT; + + /* poll the STABUSY bit */ + while ((bfin_read_EMAC_STAADD()) & STABUSY) { + mdelay(10); + if (timeout_cnt-- < 0) { + printk(KERN_ERR DRV_NAME + ": wait MDC/MDIO transaction to complete timeout\n"); + break; + } + } +} + +/* Read an off-chip register in a PHY through the MDC/MDIO port */ +static u16 read_phy_reg(u16 PHYAddr, u16 RegAddr) +{ + poll_mdc_done(); + /* read mode */ + bfin_write_EMAC_STAADD(SET_PHYAD(PHYAddr) | + SET_REGAD(RegAddr) | + STABUSY); + poll_mdc_done(); + + return (u16) bfin_read_EMAC_STADAT(); +} + +/* Write an off-chip register in a PHY through the MDC/MDIO port */ +static void raw_write_phy_reg(u16 PHYAddr, u16 RegAddr, u32 Data) +{ + bfin_write_EMAC_STADAT(Data); + + /* write mode */ + bfin_write_EMAC_STAADD(SET_PHYAD(PHYAddr) | + SET_REGAD(RegAddr) | + STAOP | + STABUSY); + + poll_mdc_done(); +} + +static void write_phy_reg(u16 PHYAddr, u16 RegAddr, u32 Data) +{ + poll_mdc_done(); + raw_write_phy_reg(PHYAddr, RegAddr, Data); +} + +/* set up the phy */ +static void bf537mac_setphy(struct net_device *dev) +{ + u16 phydat; + struct bf537mac_local *lp = netdev_priv(dev); + + /* Program PHY registers */ + pr_debug("start setting up phy\n"); + + /* issue a reset */ + raw_write_phy_reg(lp->PhyAddr, PHYREG_MODECTL, 0x8000); + + /* wait half a second */ + msleep(500); + + phydat = read_phy_reg(lp->PhyAddr, PHYREG_MODECTL); + + /* advertise flow control supported */ + phydat = read_phy_reg(lp->PhyAddr, PHYREG_ANAR); + phydat |= (1 << 10); + write_phy_reg(lp->PhyAddr, PHYREG_ANAR, phydat); + + phydat = 0; + if (lp->Negotiate) + phydat |= 0x1000; /* enable auto negotiation */ + else { + if (lp->FullDuplex) + phydat |= (1 << 8); /* full duplex */ + else + phydat &= (~(1 << 8)); /* half duplex */ + + if (!lp->Port10) + phydat |= (1 << 13); /* 100 Mbps */ + else + phydat &= (~(1 << 13)); /* 10 Mbps */ + } + + if (lp->Loopback) + phydat |= (1 << 14); /* enable TX->RX loopback */ + + write_phy_reg(lp->PhyAddr, PHYREG_MODECTL, phydat); + msleep(500); + + phydat = read_phy_reg(lp->PhyAddr, PHYREG_MODECTL); + /* check for SMSC PHY */ + if ((read_phy_reg(lp->PhyAddr, PHYREG_PHYID1) == 0x7) && + ((read_phy_reg(lp->PhyAddr, PHYREG_PHYID2) & 0xfff0) == 0xC0A0)) { + /* + * we have SMSC PHY so reqest interrupt + * on link down condition + */ + + /* enable interrupts */ + write_phy_reg(lp->PhyAddr, 30, 0x0ff); + } +} + +/**************************************************************************/ +void setup_system_regs(struct net_device *dev) +{ + int phyaddr; + unsigned short sysctl, phydat; + u32 opmode; + struct bf537mac_local *lp = netdev_priv(dev); + int count = 0; + + phyaddr = lp->PhyAddr; + + /* Enable PHY output */ + if (!(bfin_read_VR_CTL() & PHYCLKOE)) + bfin_write_VR_CTL(bfin_read_VR_CTL() | PHYCLKOE); + + /* MDC = 2.5 MHz */ + sysctl = SET_MDCDIV(24); + /* Odd word alignment for Receive Frame DMA word */ + /* Configure checksum support and rcve frame word alignment */ +#if defined(BFIN_MAC_CSUM_OFFLOAD) + sysctl |= RXDWA | RXCKS; +#else + sysctl |= RXDWA; +#endif + bfin_write_EMAC_SYSCTL(sysctl); + /* auto negotiation on */ + /* full duplex */ + /* 100 Mbps */ + phydat = PHY_ANEG_EN | PHY_DUPLEX | PHY_SPD_SET; + write_phy_reg(phyaddr, PHYREG_MODECTL, phydat); + + /* test if full duplex supported */ + do { + msleep(100); + phydat = read_phy_reg(phyaddr, PHYREG_MODESTAT); + if (count > 30) { + printk(KERN_NOTICE DRV_NAME ": Link is down\n"); + printk(KERN_NOTICE DRV_NAME + "please check your network connection\n"); + break; + } + count++; + } while (!(phydat & 0x0004)); + + phydat = read_phy_reg(phyaddr, PHYREG_ANLPAR); + + if ((phydat & 0x0100) || (phydat & 0x0040)) { + opmode = FDMODE; + } else { + opmode = 0; + printk(KERN_INFO DRV_NAME + ": Network is set to half duplex\n"); + } + +#if defined(CONFIG_BFIN_MAC_RMII) + opmode |= RMII; /* For Now only 100MBit are supported */ +#endif + + bfin_write_EMAC_OPMODE(opmode); + + bfin_write_EMAC_MMC_CTL(RSTC | CROLL); + + /* Initialize the TX DMA channel registers */ + bfin_write_DMA2_X_COUNT(0); + bfin_write_DMA2_X_MODIFY(4); + bfin_write_DMA2_Y_COUNT(0); + bfin_write_DMA2_Y_MODIFY(0); + + /* Initialize the RX DMA channel registers */ + bfin_write_DMA1_X_COUNT(0); + bfin_write_DMA1_X_MODIFY(4); + bfin_write_DMA1_Y_COUNT(0); + bfin_write_DMA1_Y_MODIFY(0); +} + +void setup_mac_addr(u8 * mac_addr) +{ + u32 addr_low = le32_to_cpu(*(__le32 *) & mac_addr[0]); + u16 addr_hi = le16_to_cpu(*(__le16 *) & mac_addr[4]); + + /* this depends on a little-endian machine */ + bfin_write_EMAC_ADDRLO(addr_low); + bfin_write_EMAC_ADDRHI(addr_hi); +} + +static void adjust_tx_list(void) +{ + int timeout_cnt = MAX_TIMEOUT_CNT; + + if (tx_list_head->status.status_word != 0 + && current_tx_ptr != tx_list_head) { + goto adjust_head; /* released something, just return; */ + } + + /* + * if nothing released, check wait condition + * current's next can not be the head, + * otherwise the dma will not stop as we want + */ + if (current_tx_ptr->next->next == tx_list_head) { + while (tx_list_head->status.status_word == 0) { + mdelay(10); + if (tx_list_head->status.status_word != 0 + || !(bfin_read_DMA2_IRQ_STATUS() & 0x08)) { + goto adjust_head; + } + if (timeout_cnt-- < 0) { + printk(KERN_ERR DRV_NAME + ": wait for adjust tx list head timeout\n"); + break; + } + } + if (tx_list_head->status.status_word != 0) { + goto adjust_head; + } + } + + return; + +adjust_head: + do { + tx_list_head->desc_a.config &= ~DMAEN; + tx_list_head->status.status_word = 0; + if (tx_list_head->skb) { + dev_kfree_skb(tx_list_head->skb); + tx_list_head->skb = NULL; + } else { + printk(KERN_ERR DRV_NAME + ": no sk_buff in a transmitted frame!\n"); + } + tx_list_head = tx_list_head->next; + } while (tx_list_head->status.status_word != 0 + && current_tx_ptr != tx_list_head); + return; + +} + +static int bf537mac_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct bf537mac_local *lp = netdev_priv(dev); + unsigned int data; + + current_tx_ptr->skb = skb; + + /* + * Is skb->data always 16-bit aligned? + * Do we need to memcpy((char *)(tail->packet + 2), skb->data, len)? + */ + if ((((unsigned int)(skb->data)) & 0x02) == 2) { + /* move skb->data to current_tx_ptr payload */ + data = (unsigned int)(skb->data) - 2; + *((unsigned short *)data) = (unsigned short)(skb->len); + current_tx_ptr->desc_a.start_addr = (unsigned long)data; + /* this is important! */ + blackfin_dcache_flush_range(data, (data + (skb->len)) + 2); + + } else { + *((unsigned short *)(current_tx_ptr->packet)) = + (unsigned short)(skb->len); + memcpy((char *)(current_tx_ptr->packet + 2), skb->data, + (skb->len)); + current_tx_ptr->desc_a.start_addr = + (unsigned long)current_tx_ptr->packet; + if (current_tx_ptr->status.status_word != 0) + current_tx_ptr->status.status_word = 0; + blackfin_dcache_flush_range((unsigned int)current_tx_ptr-> + packet, + (unsigned int)(current_tx_ptr-> + packet + skb->len) + + 2); + } + + /* enable this packet's dma */ + current_tx_ptr->desc_a.config |= DMAEN; + + /* tx dma is running, just return */ + if (bfin_read_DMA2_IRQ_STATUS() & 0x08) + goto out; + + /* tx dma is not running */ + bfin_write_DMA2_NEXT_DESC_PTR(&(current_tx_ptr->desc_a)); + /* dma enabled, read from memory, size is 6 */ + bfin_write_DMA2_CONFIG(current_tx_ptr->desc_a.config); + /* Turn on the EMAC tx */ + bfin_write_EMAC_OPMODE(bfin_read_EMAC_OPMODE() | TE); + +out: + adjust_tx_list(); + current_tx_ptr = current_tx_ptr->next; + dev->trans_start = jiffies; + lp->stats.tx_packets++; + lp->stats.tx_bytes += (skb->len); + return 0; +} + +static void bf537mac_rx(struct net_device *dev) +{ + struct sk_buff *skb, *new_skb; + struct bf537mac_local *lp = netdev_priv(dev); + unsigned short len; + + /* allocate a new skb for next time receive */ + skb = current_rx_ptr->skb; + new_skb = dev_alloc_skb(PKT_BUF_SZ + 2); + if (!new_skb) { + printk(KERN_NOTICE DRV_NAME + ": rx: low on mem - packet dropped\n"); + lp->stats.rx_dropped++; + goto out; + } + /* reserve 2 bytes for RXDWA padding */ + skb_reserve(new_skb, 2); + current_rx_ptr->skb = new_skb; + current_rx_ptr->desc_a.start_addr = (unsigned long)new_skb->data - 2; + + len = (unsigned short)((current_rx_ptr->status.status_word) & RX_FRLEN); + skb_put(skb, len); + blackfin_dcache_invalidate_range((unsigned long)skb->head, + (unsigned long)skb->tail); + + dev->last_rx = jiffies; + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); +#if defined(BFIN_MAC_CSUM_OFFLOAD) + skb->csum = current_rx_ptr->status.ip_payload_csum; + skb->ip_summed = CHECKSUM_PARTIAL; +#endif + + netif_rx(skb); + lp->stats.rx_packets++; + lp->stats.rx_bytes += len; + current_rx_ptr->status.status_word = 0x00000000; + current_rx_ptr = current_rx_ptr->next; + +out: + return; +} + +/* interrupt routine to handle rx and error signal */ +static irqreturn_t bf537mac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + int number = 0; + +get_one_packet: + if (current_rx_ptr->status.status_word == 0) { + /* no more new packet received */ + if (number == 0) { + if (current_rx_ptr->next->status.status_word != 0) { + current_rx_ptr = current_rx_ptr->next; + goto real_rx; + } + } + bfin_write_DMA1_IRQ_STATUS(bfin_read_DMA1_IRQ_STATUS() | + DMA_DONE | DMA_ERR); + return IRQ_HANDLED; + } + +real_rx: + bf537mac_rx(dev); + number++; + goto get_one_packet; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void bf537mac_poll(struct net_device *dev) +{ + disable_irq(IRQ_MAC_RX); + bf537mac_interrupt(IRQ_MAC_RX, dev); + enable_irq(IRQ_MAC_RX); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +static void bf537mac_reset(void) +{ + unsigned int opmode; + + opmode = bfin_read_EMAC_OPMODE(); + opmode &= (~RE); + opmode &= (~TE); + /* Turn off the EMAC */ + bfin_write_EMAC_OPMODE(opmode); +} + +/* + * Enable Interrupts, Receive, and Transmit + */ +static int bf537mac_enable(struct net_device *dev) +{ + u32 opmode; + + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + /* Set RX DMA */ + bfin_write_DMA1_NEXT_DESC_PTR(&(rx_list_head->desc_a)); + bfin_write_DMA1_CONFIG(rx_list_head->desc_a.config); + + /* Wait MII done */ + poll_mdc_done(); + + /* We enable only RX here */ + /* ASTP : Enable Automatic Pad Stripping + PR : Promiscuous Mode for test + PSF : Receive frames with total length less than 64 bytes. + FDMODE : Full Duplex Mode + LB : Internal Loopback for test + RE : Receiver Enable */ + opmode = bfin_read_EMAC_OPMODE(); + if (opmode & FDMODE) + opmode |= PSF; + else + opmode |= DRO | DC | PSF; + opmode |= RE; + +#if defined(CONFIG_BFIN_MAC_RMII) + opmode |= RMII; /* For Now only 100MBit are supported */ +#ifdef CONFIG_BF_REV_0_2 + opmode |= TE; +#endif +#endif + /* Turn on the EMAC rx */ + bfin_write_EMAC_OPMODE(opmode); + + return 0; +} + +/* Our watchdog timed out. Called by the networking layer */ +static void bf537mac_timeout(struct net_device *dev) +{ + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + bf537mac_reset(); + + /* reset tx queue */ + tx_list_tail = tx_list_head->next; + + bf537mac_enable(dev); + + /* We can accept TX packets again */ + dev->trans_start = jiffies; + netif_wake_queue(dev); +} + +/* + * Get the current statistics. + * This may be called with the card open or closed. + */ +static struct net_device_stats *bf537mac_query_statistics(struct net_device + *dev) +{ + struct bf537mac_local *lp = netdev_priv(dev); + + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + return &lp->stats; +} + +/* + * This routine will, depending on the values passed to it, + * either make it accept multicast packets, go into + * promiscuous mode (for TCPDUMP and cousins) or accept + * a select set of multicast packets + */ +static void bf537mac_set_multicast_list(struct net_device *dev) +{ + u32 sysctl; + + if (dev->flags & IFF_PROMISC) { + printk(KERN_INFO "%s: set to promisc mode\n", dev->name); + sysctl = bfin_read_EMAC_OPMODE(); + sysctl |= RAF; + bfin_write_EMAC_OPMODE(sysctl); + } else if (dev->flags & IFF_ALLMULTI || dev->mc_count) { + /* accept all multicast */ + sysctl = bfin_read_EMAC_OPMODE(); + sysctl |= PAM; + bfin_write_EMAC_OPMODE(sysctl); + } else { + /* clear promisc or multicast mode */ + sysctl = bfin_read_EMAC_OPMODE(); + sysctl &= ~(RAF | PAM); + bfin_write_EMAC_OPMODE(sysctl); + } +} + +/* + * this puts the device in an inactive state + */ +static void bf537mac_shutdown(struct net_device *dev) +{ + /* Turn off the EMAC */ + bfin_write_EMAC_OPMODE(0x00000000); + /* Turn off the EMAC RX DMA */ + bfin_write_DMA1_CONFIG(0x0000); + bfin_write_DMA2_CONFIG(0x0000); +} + +/* + * Open and Initialize the interface + * + * Set up everything, reset the card, etc.. + */ +static int bf537mac_open(struct net_device *dev) +{ + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + /* + * Check that the address is valid. If its not, refuse + * to bring the device up. The user must specify an + * address using ifconfig eth0 hw ether xx:xx:xx:xx:xx:xx + */ + if (!is_valid_ether_addr(dev->dev_addr)) { + printk(KERN_WARNING DRV_NAME ": no valid ethernet hw addr\n"); + return -EINVAL; + } + + /* initial rx and tx list */ + desc_list_init(); + + bf537mac_setphy(dev); + setup_system_regs(dev); + bf537mac_reset(); + bf537mac_enable(dev); + + pr_debug("hardware init finished\n"); + netif_start_queue(dev); + netif_carrier_on(dev); + + return 0; +} + +/* + * + * this makes the board clean up everything that it can + * and not talk to the outside world. Caused by + * an 'ifconfig ethX down' + */ +static int bf537mac_close(struct net_device *dev) +{ + pr_debug("%s: %s\n", dev->name, __FUNCTION__); + + netif_stop_queue(dev); + netif_carrier_off(dev); + + /* clear everything */ + bf537mac_shutdown(dev); + + /* free the rx/tx buffers */ + desc_list_free(); + + return 0; +} + +static int __init bf537mac_probe(struct net_device *dev) +{ + struct bf537mac_local *lp = netdev_priv(dev); + int retval; + + /* Grab the MAC address in the MAC */ + *(__le32 *) (&(dev->dev_addr[0])) = cpu_to_le32(bfin_read_EMAC_ADDRLO()); + *(__le16 *) (&(dev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI()); + + /* probe mac */ + /*todo: how to proble? which is revision_register */ + bfin_write_EMAC_ADDRLO(0x12345678); + if (bfin_read_EMAC_ADDRLO() != 0x12345678) { + pr_debug("can't detect bf537 mac!\n"); + retval = -ENODEV; + goto err_out; + } + + /* set the GPIO pins to Ethernet mode */ + retval = setup_pin_mux(1); + + if (retval) + return retval; + + /*Is it valid? (Did bootloader initialize it?) */ + if (!is_valid_ether_addr(dev->dev_addr)) { + /* Grab the MAC from the board somehow - this is done in the + arch/blackfin/mach-bf537/boards/eth_mac.c */ + get_bf537_ether_addr(dev->dev_addr); + } + + /* If still not valid, get a random one */ + if (!is_valid_ether_addr(dev->dev_addr)) { + random_ether_addr(dev->dev_addr); + } + + setup_mac_addr(dev->dev_addr); + + /* Fill in the fields of the device structure with ethernet values. */ + ether_setup(dev); + + dev->open = bf537mac_open; + dev->stop = bf537mac_close; + dev->hard_start_xmit = bf537mac_hard_start_xmit; + dev->tx_timeout = bf537mac_timeout; + dev->get_stats = bf537mac_query_statistics; + dev->set_multicast_list = bf537mac_set_multicast_list; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = bf537mac_poll; +#endif + + /* fill in some of the fields */ + lp->version = 1; + lp->PhyAddr = 0x01; + lp->CLKIN = 25; + lp->FullDuplex = 0; + lp->Negotiate = 1; + lp->FlowControl = 0; + spin_lock_init(&lp->lock); + + /* now, enable interrupts */ + /* register irq handler */ + if (request_irq + (IRQ_MAC_RX, bf537mac_interrupt, IRQF_DISABLED | IRQF_SHARED, + "BFIN537_MAC_RX", dev)) { + printk(KERN_WARNING DRV_NAME + ": Unable to attach BlackFin MAC RX interrupt\n"); + return -EBUSY; + } + + /* Enable PHY output early */ + if (!(bfin_read_VR_CTL() & PHYCLKOE)) + bfin_write_VR_CTL(bfin_read_VR_CTL() | PHYCLKOE); + + retval = register_netdev(dev); + if (retval == 0) { + /* now, print out the card info, in a short format.. */ + printk(KERN_INFO "%s: Version %s, %s\n", + DRV_NAME, DRV_VERSION, DRV_DESC); + } + +err_out: + return retval; +} + +static int bfin_mac_probe(struct platform_device *pdev) +{ + struct net_device *ndev; + + ndev = alloc_etherdev(sizeof(struct bf537mac_local)); + if (!ndev) { + printk(KERN_WARNING DRV_NAME ": could not allocate device\n"); + return -ENOMEM; + } + + SET_MODULE_OWNER(ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + + platform_set_drvdata(pdev, ndev); + + if (bf537mac_probe(ndev) != 0) { + platform_set_drvdata(pdev, NULL); + free_netdev(ndev); + printk(KERN_WARNING DRV_NAME ": not found\n"); + return -ENODEV; + } + + return 0; +} + +static int bfin_mac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + unregister_netdev(ndev); + + free_irq(IRQ_MAC_RX, ndev); + + free_netdev(ndev); + + setup_pin_mux(0); + + return 0; +} + +static int bfin_mac_suspend(struct platform_device *pdev, pm_message_t state) +{ + return 0; +} + +static int bfin_mac_resume(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver bfin_mac_driver = { + .probe = bfin_mac_probe, + .remove = bfin_mac_remove, + .resume = bfin_mac_resume, + .suspend = bfin_mac_suspend, + .driver = { + .name = DRV_NAME, + }, +}; + +static int __init bfin_mac_init(void) +{ + return platform_driver_register(&bfin_mac_driver); +} + +module_init(bfin_mac_init); + +static void __exit bfin_mac_cleanup(void) +{ + platform_driver_unregister(&bfin_mac_driver); +} + +module_exit(bfin_mac_cleanup); diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h new file mode 100644 index 000000000000..af87189b85fa --- /dev/null +++ b/drivers/net/bfin_mac.h @@ -0,0 +1,132 @@ +/* + * File: drivers/net/bfin_mac.c + * Based on: + * Maintainer: + * Bryan Wu <bryan.wu@analog.com> + * + * Original author: + * Luke Yang <luke.yang@analog.com> + * + * Created: + * Description: + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program ; see the file COPYING. + * If not, write to the Free Software Foundation, + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * PHY REGISTER NAMES + */ +#define PHYREG_MODECTL 0x0000 +#define PHYREG_MODESTAT 0x0001 +#define PHYREG_PHYID1 0x0002 +#define PHYREG_PHYID2 0x0003 +#define PHYREG_ANAR 0x0004 +#define PHYREG_ANLPAR 0x0005 +#define PHYREG_ANER 0x0006 +#define PHYREG_NSR 0x0010 +#define PHYREG_LBREMR 0x0011 +#define PHYREG_REC 0x0012 +#define PHYREG_10CFG 0x0013 +#define PHYREG_PHY1_1 0x0014 +#define PHYREG_PHY1_2 0x0015 +#define PHYREG_PHY2 0x0016 +#define PHYREG_TW_1 0x0017 +#define PHYREG_TW_2 0x0018 +#define PHYREG_TEST 0x0019 + +#define PHY_RESET 0x8000 +#define PHY_ANEG_EN 0x1000 +#define PHY_DUPLEX 0x0100 +#define PHY_SPD_SET 0x2000 + +#define BFIN_MAC_CSUM_OFFLOAD + +struct dma_descriptor { + struct dma_descriptor *next_dma_desc; + unsigned long start_addr; + unsigned short config; + unsigned short x_count; +}; + +struct status_area_rx { +#if defined(BFIN_MAC_CSUM_OFFLOAD) + unsigned short ip_hdr_csum; /* ip header checksum */ + /* ip payload(udp or tcp or others) checksum */ + unsigned short ip_payload_csum; +#endif + unsigned long status_word; /* the frame status word */ +}; + +struct status_area_tx { + unsigned long status_word; /* the frame status word */ +}; + +/* use two descriptors for a packet */ +struct net_dma_desc_rx { + struct net_dma_desc_rx *next; + struct sk_buff *skb; + struct dma_descriptor desc_a; + struct dma_descriptor desc_b; + struct status_area_rx status; +}; + +/* use two descriptors for a packet */ +struct net_dma_desc_tx { + struct net_dma_desc_tx *next; + struct sk_buff *skb; + struct dma_descriptor desc_a; + struct dma_descriptor desc_b; + unsigned char packet[1560]; + struct status_area_tx status; +}; + +struct bf537mac_local { + /* + * these are things that the kernel wants me to keep, so users + * can find out semi-useless statistics of how well the card is + * performing + */ + struct net_device_stats stats; + + int version; + + int FlowEnabled; /* record if data flow is active */ + int EtherIntIVG; /* IVG for the ethernet interrupt */ + int RXIVG; /* IVG for the RX completion */ + int TXIVG; /* IVG for the TX completion */ + int PhyAddr; /* PHY address */ + int OpMode; /* set these bits n the OPMODE regs */ + int Port10; /* set port speed to 10 Mbit/s */ + int GenChksums; /* IP checksums to be calculated */ + int NoRcveLnth; /* dont insert recv length at start of buffer */ + int StripPads; /* remove trailing pad bytes */ + int FullDuplex; /* set full duplex mode */ + int Negotiate; /* enable auto negotiation */ + int Loopback; /* loopback at the PHY */ + int Cache; /* Buffers may be cached */ + int FlowControl; /* flow control active */ + int CLKIN; /* clock in value in MHZ */ + unsigned short IntMask; /* interrupt mask */ + unsigned char Mac[6]; /* MAC address of the board */ + spinlock_t lock; +}; + +extern void get_bf537_ether_addr(char *addr); diff --git a/drivers/net/bsd_comp.c b/drivers/net/bsd_comp.c index 7845eaf6f29f..202d4a4ef751 100644 --- a/drivers/net/bsd_comp.c +++ b/drivers/net/bsd_comp.c @@ -395,14 +395,13 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp) * Allocate the main control structure for this instance. */ maxmaxcode = MAXCODE(bits); - db = kmalloc(sizeof (struct bsd_db), + db = kzalloc(sizeof (struct bsd_db), GFP_KERNEL); if (!db) { return NULL; } - memset (db, 0, sizeof(struct bsd_db)); /* * Allocate space for the dictionary. This may be more than one page in * length. diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 6628fa622e2c..489c8b260dd8 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h @@ -39,7 +39,7 @@ #include <asm/io.h> #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0070" +#define DRV_VERSION "EHEA_0071" /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index 1d1571cf322e..4c70a9301c1b 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -466,6 +466,8 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev, cqe->vlan_tag); else netif_receive_skb(skb); + + dev->last_rx = jiffies; } else { pr->p_stats.poll_receive_errors++; port_reset = ehea_treat_poll_error(pr, rq, cqe, @@ -1433,7 +1435,8 @@ static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) port->logical_port_id, reg_type, port->mac_addr, 0, hcallid); if (hret != H_SUCCESS) { - ehea_error("reg_dereg_bcmc failed (tagged)"); + ehea_error("%sregistering bc address failed (tagged)", + hcallid == H_REG_BCMC ? "" : "de"); ret = -EIO; goto out_herr; } @@ -1444,7 +1447,8 @@ static int ehea_broadcast_reg_helper(struct ehea_port *port, u32 hcallid) port->logical_port_id, reg_type, port->mac_addr, 0, hcallid); if (hret != H_SUCCESS) { - ehea_error("reg_dereg_bcmc failed (vlan)"); + ehea_error("%sregistering bc address failed (vlan)", + hcallid == H_REG_BCMC ? "" : "de"); ret = -EIO; } out_herr: @@ -2170,7 +2174,6 @@ static int ehea_up(struct net_device *dev) { int ret, i; struct ehea_port *port = netdev_priv(dev); - u64 mac_addr = 0; if (port->state == EHEA_PORT_UP) return 0; @@ -2189,18 +2192,10 @@ static int ehea_up(struct net_device *dev) goto out_clean_pr; } - ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); - if (ret) { - ret = -EIO; - ehea_error("out_clean_pr"); - goto out_clean_pr; - } - mac_addr = (*(u64*)dev->dev_addr) >> 16; - ret = ehea_reg_interrupts(dev); if (ret) { - ehea_error("out_dereg_bc"); - goto out_dereg_bc; + ehea_error("reg_interrupts failed. ret:%d", ret); + goto out_clean_pr; } for(i = 0; i < port->num_def_qps + port->num_add_tx_qps; i++) { @@ -2226,9 +2221,6 @@ static int ehea_up(struct net_device *dev) out_free_irqs: ehea_free_interrupts(dev); -out_dereg_bc: - ehea_broadcast_reg_helper(port, H_DEREG_BCMC); - out_clean_pr: ehea_clean_all_portres(port); out: @@ -2273,7 +2265,6 @@ static int ehea_down(struct net_device *dev) &port->port_res[i].d_netdev->state)) msleep(1); - ehea_broadcast_reg_helper(port, H_DEREG_BCMC); port->state = EHEA_PORT_DOWN; ret = ehea_clean_all_portres(port); @@ -2655,12 +2646,18 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, INIT_WORK(&port->reset_task, ehea_reset_port); + ret = ehea_broadcast_reg_helper(port, H_REG_BCMC); + if (ret) { + ret = -EIO; + goto out_unreg_port; + } + ehea_set_ethtool_ops(dev); ret = register_netdev(dev); if (ret) { ehea_error("register_netdev failed. ret=%d", ret); - goto out_unreg_port; + goto out_dereg_bc; } ret = ehea_get_jumboframe_status(port, &jumbo); @@ -2675,6 +2672,9 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, return port; +out_dereg_bc: + ehea_broadcast_reg_helper(port, H_DEREG_BCMC); + out_unreg_port: ehea_unregister_port(port); @@ -2694,6 +2694,7 @@ static void ehea_shutdown_single_port(struct ehea_port *port) { unregister_netdev(port->netdev); ehea_unregister_port(port); + ehea_broadcast_reg_helper(port, H_DEREG_BCMC); kfree(port->mc_list); free_netdev(port->netdev); port->adapter->active_ports--; diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 136827f8dc2e..6d1d50a19783 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5137,12 +5137,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_unmap; np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size]; } - np->rx_skb = kmalloc(sizeof(struct nv_skb_map) * np->rx_ring_size, GFP_KERNEL); - np->tx_skb = kmalloc(sizeof(struct nv_skb_map) * np->tx_ring_size, GFP_KERNEL); + np->rx_skb = kcalloc(np->rx_ring_size, sizeof(struct nv_skb_map), GFP_KERNEL); + np->tx_skb = kcalloc(np->tx_ring_size, sizeof(struct nv_skb_map), GFP_KERNEL); if (!np->rx_skb || !np->tx_skb) goto out_freering; - memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size); - memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size); dev->open = nv_open; dev->stop = nv_close; diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index d7a1a58de766..f92690555dd9 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -420,8 +420,18 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) if (ecntrl & ECNTRL_REDUCED_MODE) { if (ecntrl & ECNTRL_REDUCED_MII_MODE) return PHY_INTERFACE_MODE_RMII; - else + else { + phy_interface_t interface = priv->einfo->interface; + + /* + * This isn't autodetected right now, so it must + * be set by the device tree or platform code. + */ + if (interface == PHY_INTERFACE_MODE_RGMII_ID) + return PHY_INTERFACE_MODE_RGMII_ID; + return PHY_INTERFACE_MODE_RGMII; + } } if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index 3be8c5047599..205f09672492 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -453,8 +453,8 @@ static int __init setup_adapter(int card_base, int type, int n) int scc_base = card_base + hw[type].scc_offset; char *chipnames[] = CHIPNAMES; - /* Allocate memory */ - info = kmalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); + /* Initialize what is necessary for write_scc and write_scc_data */ + info = kzalloc(sizeof(struct scc_info), GFP_KERNEL | GFP_DMA); if (!info) { printk(KERN_ERR "dmascc: " "could not allocate memory for %s at %#3x\n", @@ -462,8 +462,6 @@ static int __init setup_adapter(int card_base, int type, int n) goto out; } - /* Initialize what is necessary for write_scc and write_scc_data */ - memset(info, 0, sizeof(struct scc_info)); info->dev[0] = alloc_netdev(0, "", dev_setup); if (!info->dev[0]) { diff --git a/drivers/net/irda/irport.c b/drivers/net/irda/irport.c index 3078c419cb02..20732458f5ac 100644 --- a/drivers/net/irda/irport.c +++ b/drivers/net/irda/irport.c @@ -164,14 +164,13 @@ irport_open(int i, unsigned int iobase, unsigned int irq) /* Allocate memory if needed */ if (self->tx_buff.truesize > 0) { - self->tx_buff.head = kmalloc(self->tx_buff.truesize, + self->tx_buff.head = kzalloc(self->tx_buff.truesize, GFP_KERNEL); if (self->tx_buff.head == NULL) { IRDA_ERROR("%s(), can't allocate memory for " "transmit buffer!\n", __FUNCTION__); goto err_out4; } - memset(self->tx_buff.head, 0, self->tx_buff.truesize); } self->tx_buff.data = self->tx_buff.head; diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index ad1857364d51..6f5f697ec9f8 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -505,10 +505,9 @@ static int irtty_open(struct tty_struct *tty) } /* allocate private device info block */ - priv = kmalloc(sizeof(*priv), GFP_KERNEL); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_put; - memset(priv, 0, sizeof(*priv)); priv->magic = IRTTY_MAGIC; priv->tty = tty; diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index 347d50cd77d4..0433c41f9029 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -822,10 +822,9 @@ static int veth_init_connection(u8 rlp) || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) ) return 0; - cnx = kmalloc(sizeof(*cnx), GFP_KERNEL); + cnx = kzalloc(sizeof(*cnx), GFP_KERNEL); if (! cnx) return -ENOMEM; - memset(cnx, 0, sizeof(*cnx)); cnx->remote_lp = rlp; spin_lock_init(&cnx->lock); @@ -852,14 +851,13 @@ static int veth_init_connection(u8 rlp) if (rc != 0) return rc; - msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL); + msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL); if (! msgs) { veth_error("Can't allocate buffers for LPAR %d.\n", rlp); return -ENOMEM; } cnx->msgs = msgs; - memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg)); for (i = 0; i < VETH_NUMBUFFERS; i++) { msgs[i].token = i; diff --git a/drivers/net/lance.c b/drivers/net/lance.c index a2f37e52b928..a4e5fab12628 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -533,11 +533,10 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int dev->base_addr = ioaddr; /* Make certain the data structures used by the LANCE are aligned and DMAble. */ - lp = kmalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL); + lp = kzalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL); if(lp==NULL) return -ENODEV; if (lance_debug > 6) printk(" (#0x%05lx)", (unsigned long)lp); - memset(lp, 0, sizeof(*lp)); dev->priv = lp; lp->name = chipname; lp->rx_buffs = (unsigned long)kmalloc(PKT_BUF_SZ*RX_RING_SIZE, diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c new file mode 100644 index 000000000000..112778652f7d --- /dev/null +++ b/drivers/net/lguest_net.c @@ -0,0 +1,354 @@ +/* A simple network driver for lguest. + * + * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +//#define DEBUG +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/module.h> +#include <linux/mm_types.h> +#include <linux/io.h> +#include <linux/lguest_bus.h> + +#define SHARED_SIZE PAGE_SIZE +#define MAX_LANS 4 +#define NUM_SKBS 8 + +struct lguestnet_info +{ + /* The shared page(s). */ + struct lguest_net *peer; + unsigned long peer_phys; + unsigned long mapsize; + + /* The lguest_device I come from */ + struct lguest_device *lgdev; + + /* My peerid. */ + unsigned int me; + + /* Receive queue. */ + struct sk_buff *skb[NUM_SKBS]; + struct lguest_dma dma[NUM_SKBS]; +}; + +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); +} + +/* Simple convention: offset 4 * peernum. */ +static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum) +{ + return info->peer_phys + 4 * peernum; +} + +static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen, + struct lguest_dma *dma) +{ + unsigned int i, seg; + + for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) { + dma->addr[seg] = virt_to_phys(skb->data + i); + dma->len[seg] = min((unsigned)(headlen - i), + rest_of_page(skb->data + i)); + } + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) { + const skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */ + if (seg == LGUEST_MAX_DMA_SECTIONS) { + printk("Woah dude! Megapacket!\n"); + break; + } + dma->addr[seg] = page_to_phys(f->page) + f->page_offset; + dma->len[seg] = f->size; + } + if (seg < LGUEST_MAX_DMA_SECTIONS) + dma->len[seg] = 0; +} + +/* We overload multicast bit to show promiscuous mode. */ +#define PROMISC_BIT 0x01 + +static void lguestnet_set_multicast(struct net_device *dev) +{ + struct lguestnet_info *info = netdev_priv(dev); + + if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count) + info->peer[info->me].mac[0] |= PROMISC_BIT; + else + info->peer[info->me].mac[0] &= ~PROMISC_BIT; +} + +static int promisc(struct lguestnet_info *info, unsigned int peer) +{ + return info->peer[peer].mac[0] & PROMISC_BIT; +} + +static int mac_eq(const unsigned char mac[ETH_ALEN], + struct lguestnet_info *info, unsigned int peer) +{ + /* Ignore multicast bit, which peer turns on to mean promisc. */ + if ((info->peer[peer].mac[0] & (~PROMISC_BIT)) != mac[0]) + return 0; + return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0; +} + +static void transfer_packet(struct net_device *dev, + struct sk_buff *skb, + unsigned int peernum) +{ + struct lguestnet_info *info = netdev_priv(dev); + struct lguest_dma dma; + + skb_to_dma(skb, skb_headlen(skb), &dma); + pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len); + + lguest_send_dma(peer_key(info, peernum), &dma); + if (dma.used_len != skb->len) { + dev->stats.tx_carrier_errors++; + pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n", + peernum, dma.used_len, skb->len, + (void *)dma.addr[0], dma.len[0]); + } else { + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + } +} + +static int unused_peer(const struct lguest_net peer[], unsigned int num) +{ + return peer[num].mac[0] == 0; +} + +static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i; + int broadcast; + struct lguestnet_info *info = netdev_priv(dev); + const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; + + pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n", + dev->name, dest[0],dest[1],dest[2],dest[3],dest[4],dest[5]); + + broadcast = is_multicast_ether_addr(dest); + for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) { + if (i == info->me || unused_peer(info->peer, i)) + continue; + + if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i)) + continue; + + pr_debug("lguestnet %s: sending from %i to %i\n", + dev->name, info->me, i); + transfer_packet(dev, skb, i); + } + dev_kfree_skb(skb); + return 0; +} + +/* Find a new skb to put in this slot in shared mem. */ +static int fill_slot(struct net_device *dev, unsigned int slot) +{ + struct lguestnet_info *info = netdev_priv(dev); + /* Try to create and register a new one. */ + info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN); + if (!info->skb[slot]) { + printk("%s: could not fill slot %i\n", dev->name, slot); + return -ENOMEM; + } + + skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]); + wmb(); + /* Now we tell hypervisor it can use the slot. */ + info->dma[slot].used_len = 0; + return 0; +} + +static irqreturn_t lguestnet_rcv(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct lguestnet_info *info = netdev_priv(dev); + unsigned int i, done = 0; + + for (i = 0; i < ARRAY_SIZE(info->dma); i++) { + unsigned int length; + struct sk_buff *skb; + + length = info->dma[i].used_len; + if (length == 0) + continue; + + done++; + skb = info->skb[i]; + fill_slot(dev, i); + + if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) { + pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n", + dev->name, length); + dev_kfree_skb(skb); + continue; + } + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, dev); + /* This is a reliable transport. */ + if (dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = CHECKSUM_UNNECESSARY; + pr_debug("Receiving skb proto 0x%04x len %i type %i\n", + ntohs(skb->protocol), skb->len, skb->pkt_type); + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + netif_rx(skb); + } + return done ? IRQ_HANDLED : IRQ_NONE; +} + +static int lguestnet_open(struct net_device *dev) +{ + int i; + struct lguestnet_info *info = netdev_priv(dev); + + /* Set up our MAC address */ + memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN); + + /* Turn on promisc mode if needed */ + lguestnet_set_multicast(dev); + + for (i = 0; i < ARRAY_SIZE(info->dma); i++) { + if (fill_slot(dev, i) != 0) + goto cleanup; + } + if (lguest_bind_dma(peer_key(info,info->me), info->dma, + NUM_SKBS, lgdev_irq(info->lgdev)) != 0) + goto cleanup; + return 0; + +cleanup: + while (--i >= 0) + dev_kfree_skb(info->skb[i]); + return -ENOMEM; +} + +static int lguestnet_close(struct net_device *dev) +{ + unsigned int i; + struct lguestnet_info *info = netdev_priv(dev); + + /* Clear all trace: others might deliver packets, we'll ignore it. */ + memset(&info->peer[info->me], 0, sizeof(info->peer[info->me])); + + /* Deregister sg lists. */ + lguest_unbind_dma(peer_key(info, info->me), info->dma); + for (i = 0; i < ARRAY_SIZE(info->dma); i++) + dev_kfree_skb(info->skb[i]); + return 0; +} + +static int lguestnet_probe(struct lguest_device *lgdev) +{ + int err, irqf = IRQF_SHARED; + struct net_device *dev; + struct lguestnet_info *info; + struct lguest_device_desc *desc = &lguest_devices[lgdev->index]; + + pr_debug("lguest_net: probing for device %i\n", lgdev->index); + + dev = alloc_etherdev(sizeof(struct lguestnet_info)); + if (!dev) + return -ENOMEM; + + SET_MODULE_OWNER(dev); + + /* Ethernet defaults with some changes */ + ether_setup(dev); + dev->set_mac_address = NULL; + + dev->dev_addr[0] = 0x02; /* set local assignment bit (IEEE802) */ + dev->dev_addr[1] = 0x00; + memcpy(&dev->dev_addr[2], &lguest_data.guestid, 2); + dev->dev_addr[4] = 0x00; + dev->dev_addr[5] = 0x00; + + dev->open = lguestnet_open; + dev->stop = lguestnet_close; + dev->hard_start_xmit = lguestnet_start_xmit; + + /* Turning on/off promisc will call dev->set_multicast_list. + * We don't actually support multicast yet */ + dev->set_multicast_list = lguestnet_set_multicast; + SET_NETDEV_DEV(dev, &lgdev->dev); + if (desc->features & LGUEST_NET_F_NOCSUM) + dev->features = NETIF_F_SG|NETIF_F_NO_CSUM; + + info = netdev_priv(dev); + info->mapsize = PAGE_SIZE * desc->num_pages; + info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT); + info->lgdev = lgdev; + info->peer = lguest_map(info->peer_phys, desc->num_pages); + if (!info->peer) { + err = -ENOMEM; + goto free; + } + + /* This stores our peerid (upper bits reserved for future). */ + info->me = (desc->features & (info->mapsize-1)); + + err = register_netdev(dev); + if (err) { + pr_debug("lguestnet: registering device failed\n"); + goto unmap; + } + + if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) + irqf |= IRQF_SAMPLE_RANDOM; + if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet", + dev) != 0) { + pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev)); + goto unregister; + } + + pr_debug("lguestnet: registered device %s\n", dev->name); + lgdev->private = dev; + return 0; + +unregister: + unregister_netdev(dev); +unmap: + lguest_unmap(info->peer); +free: + free_netdev(dev); + return err; +} + +static struct lguest_driver lguestnet_drv = { + .name = "lguestnet", + .owner = THIS_MODULE, + .device_type = LGUEST_DEVICE_T_NET, + .probe = lguestnet_probe, +}; + +static __init int lguestnet_init(void) +{ + return register_lguest_driver(&lguestnet_drv); +} +module_init(lguestnet_init); + +MODULE_DESCRIPTION("Lguest network driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c index 1bb088aeaf71..6b32ec94b3a8 100644 --- a/drivers/net/mlx4/catas.c +++ b/drivers/net/mlx4/catas.c @@ -30,41 +30,133 @@ * SOFTWARE. */ +#include <linux/workqueue.h> + #include "mlx4.h" -void mlx4_handle_catas_err(struct mlx4_dev *dev) +enum { + MLX4_CATAS_POLL_INTERVAL = 5 * HZ, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static LIST_HEAD(catas_list); +static struct workqueue_struct *catas_wq; +static struct work_struct catas_work; + +static int internal_err_reset = 1; +module_param(internal_err_reset, int, 0644); +MODULE_PARM_DESC(internal_err_reset, + "Reset device on internal errors if non-zero (default 1)"); + +static void dump_err_buf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; - mlx4_err(dev, "Catastrophic error detected:\n"); + mlx4_err(dev, "Internal error detected:\n"); for (i = 0; i < priv->fw.catas_size; ++i) mlx4_err(dev, " buf[%02x]: %08x\n", i, swab32(readl(priv->catas_err.map + i))); +} - mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); +static void poll_catas(unsigned long dev_ptr) +{ + struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + + mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0); + + if (internal_err_reset) { + spin_lock(&catas_lock); + list_add(&priv->catas_err.list, &catas_list); + spin_unlock(&catas_lock); + + queue_work(catas_wq, &catas_work); + } + } else + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); } -void mlx4_map_catas_buf(struct mlx4_dev *dev) +static void catas_reset(struct work_struct *work) +{ + struct mlx4_priv *priv, *tmppriv; + struct mlx4_dev *dev; + + LIST_HEAD(tlist); + int ret; + + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { + ret = mlx4_restart_one(priv->dev.pdev); + dev = &priv->dev; + if (ret) + mlx4_err(dev, "Reset failed (%d)\n", ret); + else + mlx4_dbg(dev, "Reset succeeded\n"); + } +} + +void mlx4_start_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); unsigned long addr; + INIT_LIST_HEAD(&priv->catas_err.list); + init_timer(&priv->catas_err.timer); + priv->catas_err.map = NULL; + addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + priv->fw.catas_offset; priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) - mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n", + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n", addr); + return; + } + priv->catas_err.timer.data = (unsigned long) dev; + priv->catas_err.timer.function = poll_catas; + priv->catas_err.timer.expires = + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); + add_timer(&priv->catas_err.timer); } -void mlx4_unmap_catas_buf(struct mlx4_dev *dev) +void mlx4_stop_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + del_timer_sync(&priv->catas_err.timer); + if (priv->catas_err.map) iounmap(priv->catas_err.map); + + spin_lock_irq(&catas_lock); + list_del(&priv->catas_err.list); + spin_unlock_irq(&catas_lock); +} + +int __init mlx4_catas_init(void) +{ + INIT_WORK(&catas_work, catas_reset); + + catas_wq = create_singlethread_workqueue("mlx4_err"); + if (!catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_cleanup(void) +{ + destroy_workqueue(catas_wq); } diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 27a82cecd693..2095c843fa15 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -89,14 +89,12 @@ struct mlx4_eq_context { (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \ (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \ - (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \ (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \ (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \ (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ (1ull << MLX4_EVENT_TYPE_CMD)) -#define MLX4_CATAS_EVENT_MASK (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR) struct mlx4_eqe { u8 reserved1; @@ -264,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); return IRQ_RETVAL(work); @@ -281,14 +279,6 @@ static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr) return IRQ_HANDLED; } -static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr) -{ - mlx4_handle_catas_err(dev_ptr); - - /* MSI-X vectors always belong to us */ - return IRQ_HANDLED; -} - static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, int eq_num) { @@ -490,11 +480,9 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); - if (eq_table->eq[MLX4_EQ_CATAS].have_irq) - free_irq(eq_table->eq[MLX4_EQ_CATAS].irq, dev); } static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev) @@ -598,32 +586,19 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) if (dev->flags & MLX4_FLAG_MSI_X) { static const char *eq_name[] = { [MLX4_EQ_COMP] = DRV_NAME " (comp)", - [MLX4_EQ_ASYNC] = DRV_NAME " (async)", - [MLX4_EQ_CATAS] = DRV_NAME " (catas)" + [MLX4_EQ_ASYNC] = DRV_NAME " (async)" }; - err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS, - &priv->eq_table.eq[MLX4_EQ_CATAS]); - if (err) - goto err_out_async; - - for (i = 0; i < MLX4_EQ_CATAS; ++i) { + for (i = 0; i < MLX4_NUM_EQ; ++i) { err = request_irq(priv->eq_table.eq[i].irq, mlx4_msi_x_interrupt, 0, eq_name[i], priv->eq_table.eq + i); if (err) - goto err_out_catas; + goto err_out_async; priv->eq_table.eq[i].have_irq = 1; } - err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq, - mlx4_catas_interrupt, 0, - eq_name[MLX4_EQ_CATAS], dev); - if (err) - goto err_out_catas; - - priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1; } else { err = request_irq(dev->pdev->irq, mlx4_interrupt, IRQF_SHARED, DRV_NAME, dev); @@ -639,22 +614,11 @@ int __devinit mlx4_init_eq_table(struct mlx4_dev *dev) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) eq_set_ci(&priv->eq_table.eq[i], 1); - if (dev->flags & MLX4_FLAG_MSI_X) { - err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0, - priv->eq_table.eq[MLX4_EQ_CATAS].eqn); - if (err) - mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n", - priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err); - } - return 0; -err_out_catas: - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]); - err_out_async: mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); @@ -675,19 +639,13 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; - if (dev->flags & MLX4_FLAG_MSI_X) - mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1, - priv->eq_table.eq[MLX4_EQ_CATAS].eqn); - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); mlx4_free_irqs(dev); - for (i = 0; i < MLX4_EQ_CATAS; ++i) + for (i = 0; i < MLX4_NUM_EQ; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); - if (dev->flags & MLX4_FLAG_MSI_X) - mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]); mlx4_unmap_clr_int(dev); diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c index 9ae951bf6aa6..be5d9e90ccf2 100644 --- a/drivers/net/mlx4/intf.c +++ b/drivers/net/mlx4/intf.c @@ -142,6 +142,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); + mlx4_start_catas_poll(dev); return 0; } @@ -151,6 +152,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index a4f2e0475a71..4dc9dc19b716 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -78,7 +78,7 @@ static const char mlx4_version[] __devinitdata = static struct mlx4_profile default_profile = { .num_qp = 1 << 16, .num_srq = 1 << 16, - .rdmarc_per_qp = 4, + .rdmarc_per_qp = 1 << 4, .num_cq = 1 << 16, .num_mcg = 1 << 13, .num_mpt = 1 << 17, @@ -583,13 +583,11 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev) goto err_pd_table_free; } - mlx4_map_catas_buf(dev); - err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table, aborting.\n"); - goto err_catas_buf; + goto err_mr_table_free; } err = mlx4_cmd_use_events(dev); @@ -659,8 +657,7 @@ err_cmd_poll: err_eq_table_free: mlx4_cleanup_eq_table(dev); -err_catas_buf: - mlx4_unmap_catas_buf(dev); +err_mr_table_free: mlx4_cleanup_mr_table(dev); err_pd_table_free: @@ -836,9 +833,6 @@ err_cleanup: mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); - - mlx4_unmap_catas_buf(dev); - mlx4_cleanup_mr_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); @@ -885,9 +879,6 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev) mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); - - mlx4_unmap_catas_buf(dev); - mlx4_cleanup_mr_table(dev); mlx4_cleanup_pd_table(dev); @@ -908,6 +899,12 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev) } } +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_remove_one(pdev); + return mlx4_init_one(pdev, NULL); +} + static struct pci_device_id mlx4_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */ @@ -930,6 +927,10 @@ static int __init mlx4_init(void) { int ret; + ret = mlx4_catas_init(); + if (ret) + return ret; + ret = pci_register_driver(&mlx4_driver); return ret < 0 ? ret : 0; } @@ -937,6 +938,7 @@ static int __init mlx4_init(void) static void __exit mlx4_cleanup(void) { pci_unregister_driver(&mlx4_driver); + mlx4_catas_cleanup(); } module_init(mlx4_init); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index d9c91a71fc87..be304a7c2c91 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -39,6 +39,7 @@ #include <linux/mutex.h> #include <linux/radix-tree.h> +#include <linux/timer.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -67,7 +68,6 @@ enum { enum { MLX4_EQ_ASYNC, MLX4_EQ_COMP, - MLX4_EQ_CATAS, MLX4_NUM_EQ }; @@ -248,7 +248,8 @@ struct mlx4_mcg_table { struct mlx4_catas_err { u32 __iomem *map; - int size; + struct timer_list timer; + struct list_head list; }; struct mlx4_priv { @@ -311,9 +312,11 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -void mlx4_map_catas_buf(struct mlx4_dev *dev); -void mlx4_unmap_catas_buf(struct mlx4_dev *dev); - +void mlx4_start_catas_poll(struct mlx4_dev *dev); +void mlx4_stop_catas_poll(struct mlx4_dev *dev); +int mlx4_catas_init(void); +void mlx4_catas_cleanup(void); +int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type, diff --git a/drivers/net/ni5010.c b/drivers/net/ni5010.c index 3d5b4232f65f..22a3b3dc7d89 100644 --- a/drivers/net/ni5010.c +++ b/drivers/net/ni5010.c @@ -670,14 +670,10 @@ static void ni5010_set_multicast_list(struct net_device *dev) PRINTK2((KERN_DEBUG "%s: entering set_multicast_list\n", dev->name)); - if (dev->flags&IFF_PROMISC || dev->flags&IFF_ALLMULTI) { + if (dev->flags&IFF_PROMISC || dev->flags&IFF_ALLMULTI || dev->mc_list) { dev->flags |= IFF_PROMISC; outb(RMD_PROMISC, EDLC_RMODE); /* Enable promiscuous mode */ PRINTK((KERN_DEBUG "%s: Entering promiscuous mode\n", dev->name)); - } else if (dev->mc_list) { - /* Sorry, multicast not supported */ - PRINTK((KERN_DEBUG "%s: No multicast, entering broadcast mode\n", dev->name)); - outb(RMD_BROADCAST, EDLC_RMODE); } else { PRINTK((KERN_DEBUG "%s: Entering broadcast mode\n", dev->name)); outb(RMD_BROADCAST, EDLC_RMODE); /* Disable promiscuous mode, use normal mode */ diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 104aab3c957f..ea80e6cb3dec 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -1582,7 +1582,7 @@ static void ns83820_set_multicast(struct net_device *ndev) else and_mask &= ~(RFCR_AAU | RFCR_AAM); - if (ndev->flags & IFF_ALLMULTI) + if (ndev->flags & IFF_ALLMULTI || ndev->mc_count) or_mask |= RFCR_AAM; else and_mask &= ~RFCR_AAM; diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 0d1c7a41c9c6..ea9414c4d900 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -147,7 +147,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) DEBUG(0, "com20020_attach()\n"); /* Create new network device */ - info = kmalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); + info = kzalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); if (!info) goto fail_alloc_info; @@ -155,7 +155,6 @@ static int com20020_probe(struct pcmcia_device *p_dev) if (!dev) goto fail_alloc_dev; - memset(info, 0, sizeof(struct com20020_dev_t)); lp = dev->priv; lp->timeout = timeout; lp->backplane = backplane; diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 4ecb8ca5a992..4eafa4f42cff 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -146,9 +146,8 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) DEBUG(0, "ibmtr_attach()\n"); /* Create new token-ring device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info,0,sizeof(*info)); dev = alloc_trdev(sizeof(struct tok_info)); if (!dev) { kfree(info); diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 596222b260d6..6a5385647911 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -21,6 +21,10 @@ /* Vitesse Extended Control Register 1 */ #define MII_VSC8244_EXT_CON1 0x17 #define MII_VSC8244_EXTCON1_INIT 0x0000 +#define MII_VSC8244_EXTCON1_TX_SKEW_MASK 0x0c00 +#define MII_VSC8244_EXTCON1_RX_SKEW_MASK 0x0300 +#define MII_VSC8244_EXTCON1_TX_SKEW 0x0800 +#define MII_VSC8244_EXTCON1_RX_SKEW 0x0200 /* Vitesse Interrupt Mask Register */ #define MII_VSC8244_IMASK 0x19 @@ -39,7 +43,7 @@ /* Vitesse Auxiliary Control/Status Register */ #define MII_VSC8244_AUX_CONSTAT 0x1c -#define MII_VSC8244_AUXCONSTAT_INIT 0x0004 +#define MII_VSC8244_AUXCONSTAT_INIT 0x0000 #define MII_VSC8244_AUXCONSTAT_DUPLEX 0x0020 #define MII_VSC8244_AUXCONSTAT_SPEED 0x0018 #define MII_VSC8244_AUXCONSTAT_GBIT 0x0010 @@ -51,6 +55,7 @@ MODULE_LICENSE("GPL"); static int vsc824x_config_init(struct phy_device *phydev) { + int extcon; int err; err = phy_write(phydev, MII_VSC8244_AUX_CONSTAT, @@ -58,14 +63,34 @@ static int vsc824x_config_init(struct phy_device *phydev) if (err < 0) return err; - err = phy_write(phydev, MII_VSC8244_EXT_CON1, - MII_VSC8244_EXTCON1_INIT); + extcon = phy_read(phydev, MII_VSC8244_EXT_CON1); + + if (extcon < 0) + return err; + + extcon &= ~(MII_VSC8244_EXTCON1_TX_SKEW_MASK | + MII_VSC8244_EXTCON1_RX_SKEW_MASK); + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + extcon |= (MII_VSC8244_EXTCON1_TX_SKEW | + MII_VSC8244_EXTCON1_RX_SKEW); + + err = phy_write(phydev, MII_VSC8244_EXT_CON1, extcon); + return err; } static int vsc824x_ack_interrupt(struct phy_device *phydev) { - int err = phy_read(phydev, MII_VSC8244_ISTAT); + int err = 0; + + /* + * Don't bother to ACK the interrupts if interrupts + * are disabled. The 824x cannot clear the interrupts + * if they are disabled. + */ + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_read(phydev, MII_VSC8244_ISTAT); return (err < 0) ? err : 0; } @@ -77,8 +102,19 @@ static int vsc824x_config_intr(struct phy_device *phydev) if (phydev->interrupts == PHY_INTERRUPT_ENABLED) err = phy_write(phydev, MII_VSC8244_IMASK, MII_VSC8244_IMASK_MASK); - else + else { + /* + * The Vitesse PHY cannot clear the interrupt + * once it has disabled them, so we clear them first + */ + err = phy_read(phydev, MII_VSC8244_ISTAT); + + if (err) + return err; + err = phy_write(phydev, MII_VSC8244_IMASK, 0); + } + return err; } diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index caabbc408c34..27f5b904f48e 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -159,12 +159,11 @@ ppp_asynctty_open(struct tty_struct *tty) int err; err = -ENOMEM; - ap = kmalloc(sizeof(*ap), GFP_KERNEL); + ap = kzalloc(sizeof(*ap), GFP_KERNEL); if (ap == 0) goto out; /* initialize the asyncppp structure */ - memset(ap, 0, sizeof(*ap)); ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c index 72c8d6628f58..eb98b661efba 100644 --- a/drivers/net/ppp_deflate.c +++ b/drivers/net/ppp_deflate.c @@ -121,12 +121,11 @@ static void *z_comp_alloc(unsigned char *options, int opt_len) if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) return NULL; - state = kmalloc(sizeof(*state), + state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) return NULL; - memset (state, 0, sizeof (struct ppp_deflate_state)); state->strm.next_in = NULL; state->w_size = w_size; state->strm.workspace = vmalloc(zlib_deflate_workspacesize()); @@ -341,11 +340,10 @@ static void *z_decomp_alloc(unsigned char *options, int opt_len) if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) return NULL; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) return NULL; - memset (state, 0, sizeof (struct ppp_deflate_state)); state->w_size = w_size; state->strm.next_out = NULL; state->strm.workspace = kmalloc(zlib_inflate_workspacesize(), diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 3ef0092dc09c..ef3325b69233 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -2684,8 +2684,7 @@ static void __exit ppp_cleanup(void) if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count)) printk(KERN_ERR "PPP: removing module but units remain!\n"); cardmap_destroy(&all_ppp_units); - if (unregister_chrdev(PPP_MAJOR, "ppp") != 0) - printk(KERN_ERR "PPP: failed to unregister PPP device\n"); + unregister_chrdev(PPP_MAJOR, "ppp"); device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0)); class_destroy(ppp_class); } diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index d5bdd2574659..f79cf87a2bff 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -200,11 +200,10 @@ static void *mppe_alloc(unsigned char *options, int optlen) || options[0] != CI_MPPE || options[1] != CILEN_MPPE) goto out; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (state == NULL) goto out; - memset(state, 0, sizeof(*state)); state->arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(state->arc4)) { diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 5918fab38349..ce64032a465a 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -207,13 +207,12 @@ ppp_sync_open(struct tty_struct *tty) struct syncppp *ap; int err; - ap = kmalloc(sizeof(*ap), GFP_KERNEL); + ap = kzalloc(sizeof(*ap), GFP_KERNEL); err = -ENOMEM; if (ap == 0) goto out; /* initialize the syncppp structure */ - memset(ap, 0, sizeof(*ap)); ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c index 451486b32f23..7dae4d404978 100644 --- a/drivers/net/saa9730.c +++ b/drivers/net/saa9730.c @@ -940,15 +940,14 @@ static void lan_saa9730_set_multicast(struct net_device *dev) CAM_CONTROL_GROUP_ACC | CAM_CONTROL_BROAD_ACC, &lp->lan_saa9730_regs->CamCtl); } else { - if (dev->flags & IFF_ALLMULTI) { + if (dev->flags & IFF_ALLMULTI || dev->mc_count) { /* accept all multicast packets */ - writel(CAM_CONTROL_COMP_EN | CAM_CONTROL_GROUP_ACC | - CAM_CONTROL_BROAD_ACC, - &lp->lan_saa9730_regs->CamCtl); - } else { /* * Will handle the multicast stuff later. -carstenl */ + writel(CAM_CONTROL_COMP_EN | CAM_CONTROL_GROUP_ACC | + CAM_CONTROL_BROAD_ACC, + &lp->lan_saa9730_regs->CamCtl); } } diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index e886e8d7cfdf..4c3d98ff4cd4 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -600,10 +600,9 @@ static int __init shaper_init(void) return -ENODEV; alloc_size = sizeof(*dev) * shapers; - devs = kmalloc(alloc_size, GFP_KERNEL); + devs = kzalloc(alloc_size, GFP_KERNEL); if (!devs) return -ENOMEM; - memset(devs, 0, alloc_size); for (i = 0; i < shapers; i++) { diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index 75655add3f34..7f94ca930988 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -626,7 +626,7 @@ static int __devinit tc35815_read_plat_dev_addr(struct net_device *dev) return -ENODEV; } #else -static int __devinit tc35815_read_plat_dev_addr(struct device *dev) +static int __devinit tc35815_read_plat_dev_addr(struct net_device *dev) { return -ENODEV; } diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c index 6b63b350cd52..8ead774d14c8 100644 --- a/drivers/net/wan/c101.c +++ b/drivers/net/wan/c101.c @@ -315,12 +315,11 @@ static int __init c101_run(unsigned long irq, unsigned long winbase) return -ENODEV; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "c101: unable to allocate memory\n"); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); card->dev = alloc_hdlcdev(card); if (!card->dev) { diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 9ef49ce148b2..26058b4f8f36 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -572,13 +572,11 @@ static int cosa_probe(int base, int irq, int dma) sprintf(cosa->name, "cosa%d", cosa->num); /* Initialize the per-channel data */ - cosa->chan = kmalloc(sizeof(struct channel_data)*cosa->nchannels, - GFP_KERNEL); + cosa->chan = kcalloc(cosa->nchannels, sizeof(struct channel_data), GFP_KERNEL); if (!cosa->chan) { err = -ENOMEM; goto err_out3; } - memset(cosa->chan, 0, sizeof(struct channel_data)*cosa->nchannels); for (i=0; i<cosa->nchannels; i++) { cosa->chan[i].cosa = cosa; cosa->chan[i].num = i; diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c index 6e5f1c898517..a0e8611ad8e8 100644 --- a/drivers/net/wan/cycx_main.c +++ b/drivers/net/wan/cycx_main.c @@ -113,12 +113,10 @@ static int __init cycx_init(void) /* Verify number of cards and allocate adapter data space */ cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS); cycx_ncards = max_t(int, cycx_ncards, 1); - cycx_card_array = kmalloc(sizeof(struct cycx_device) * cycx_ncards, - GFP_KERNEL); + cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL); if (!cycx_card_array) goto out; - memset(cycx_card_array, 0, sizeof(struct cycx_device) * cycx_ncards); /* Register adapters with WAN router */ for (cnt = 0; cnt < cycx_ncards; ++cnt) { diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c index 016b3ff3ea5e..a8af28b273d3 100644 --- a/drivers/net/wan/cycx_x25.c +++ b/drivers/net/wan/cycx_x25.c @@ -376,11 +376,10 @@ static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, } /* allocate and initialize private data */ - chan = kmalloc(sizeof(struct cycx_x25_channel), GFP_KERNEL); + chan = kzalloc(sizeof(struct cycx_x25_channel), GFP_KERNEL); if (!chan) return -ENOMEM; - memset(chan, 0, sizeof(*chan)); strcpy(chan->name, conf->name); chan->card = card; chan->link = conf->port; diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index dca024471455..50d2f9108dca 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -890,12 +890,11 @@ static int dscc4_found1(struct pci_dev *pdev, void __iomem *ioaddr) struct dscc4_dev_priv *root; int i, ret = -ENOMEM; - root = kmalloc(dev_per_card*sizeof(*root), GFP_KERNEL); + root = kcalloc(dev_per_card, sizeof(*root), GFP_KERNEL); if (!root) { printk(KERN_ERR "%s: can't allocate data\n", DRV_NAME); goto err_out; } - memset(root, 0, dev_per_card*sizeof(*root)); for (i = 0; i < dev_per_card; i++) { root[i].dev = alloc_hdlcdev(root + i); @@ -903,12 +902,11 @@ static int dscc4_found1(struct pci_dev *pdev, void __iomem *ioaddr) goto err_free_dev; } - ppriv = kmalloc(sizeof(*ppriv), GFP_KERNEL); + ppriv = kzalloc(sizeof(*ppriv), GFP_KERNEL); if (!ppriv) { printk(KERN_ERR "%s: can't allocate private data\n", DRV_NAME); goto err_free_dev; } - memset(ppriv, 0, sizeof(struct dscc4_pci_priv)); ppriv->root = root; spin_lock_init(&ppriv->lock); diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 58a53b6d9b42..12dae8e24844 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2476,13 +2476,12 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Allocate driver private data */ - card = kmalloc(sizeof (struct fst_card_info), GFP_KERNEL); + card = kzalloc(sizeof (struct fst_card_info), GFP_KERNEL); if (card == NULL) { printk_err("FarSync card found but insufficient memory for" " driver storage\n"); return -ENOMEM; } - memset(card, 0, sizeof (struct fst_card_info)); /* Try to enable the device */ if ((err = pci_enable_device(pdev)) != 0) { diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 9ba3e4ee6ec7..bf5f8d9b5c83 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -231,11 +231,10 @@ static struct sv11_device *sv11_init(int iobase, int irq) return NULL; } - sv = kmalloc(sizeof(struct sv11_device), GFP_KERNEL); + sv = kzalloc(sizeof(struct sv11_device), GFP_KERNEL); if(!sv) goto fail3; - memset(sv, 0, sizeof(*sv)); sv->if_ptr=&sv->netdev; sv->netdev.dev = alloc_netdev(0, "hdlc%d", sv11_setup); diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c index 5c322dfb79f6..cbdf0b748bde 100644 --- a/drivers/net/wan/n2.c +++ b/drivers/net/wan/n2.c @@ -351,12 +351,11 @@ static int __init n2_run(unsigned long io, unsigned long irq, return -ENODEV; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "n2: unable to allocate memory\n"); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); card->ports[0].dev = alloc_hdlcdev(&card->ports[0]); card->ports[1].dev = alloc_hdlcdev(&card->ports[1]); diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c index 5d8c78ee2cd9..99fee2f1d019 100644 --- a/drivers/net/wan/pc300_drv.c +++ b/drivers/net/wan/pc300_drv.c @@ -3456,7 +3456,7 @@ cpc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if ((err = pci_enable_device(pdev)) < 0) return err; - card = kmalloc(sizeof(pc300_t), GFP_KERNEL); + card = kzalloc(sizeof(pc300_t), GFP_KERNEL); if (card == NULL) { printk("PC300 found at RAM 0x%016llx, " "but could not allocate card structure.\n", @@ -3464,7 +3464,6 @@ cpc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto err_disable_dev; } - memset(card, 0, sizeof(pc300_t)); err = -ENODEV; diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c index dfbd3b00f03b..6353cb5c658d 100644 --- a/drivers/net/wan/pc300too.c +++ b/drivers/net/wan/pc300too.c @@ -334,14 +334,13 @@ static int __devinit pc300_pci_init_one(struct pci_dev *pdev, return i; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "pc300: unable to allocate memory\n"); pci_release_regions(pdev); pci_disable_device(pdev); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); pci_set_drvdata(pdev, card); if (pdev->device == PCI_DEVICE_ID_PC300_TE_1 || diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c index 7f720de2e9f0..092e51d89036 100644 --- a/drivers/net/wan/pci200syn.c +++ b/drivers/net/wan/pci200syn.c @@ -312,14 +312,13 @@ static int __devinit pci200_pci_init_one(struct pci_dev *pdev, return i; } - card = kmalloc(sizeof(card_t), GFP_KERNEL); + card = kzalloc(sizeof(card_t), GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "pci200syn: unable to allocate memory\n"); pci_release_regions(pdev); pci_disable_device(pdev); return -ENOBUFS; } - memset(card, 0, sizeof(card_t)); pci_set_drvdata(pdev, card); card->ports[0].dev = alloc_hdlcdev(&card->ports[0]); card->ports[1].dev = alloc_hdlcdev(&card->ports[1]); diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 6a485f0556f4..792e588d7d61 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -1196,10 +1196,9 @@ static int sdla_xfer(struct net_device *dev, struct sdla_mem __user *info, int r if (read) { - temp = kmalloc(mem.len, GFP_KERNEL); + temp = kzalloc(mem.len, GFP_KERNEL); if (!temp) return(-ENOMEM); - memset(temp, 0, mem.len); sdla_read(dev, mem.addr, temp, mem.len); if(copy_to_user(mem.data, temp, mem.len)) { diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c index 131358108c5a..11276bf3149f 100644 --- a/drivers/net/wan/sealevel.c +++ b/drivers/net/wan/sealevel.c @@ -270,11 +270,10 @@ static __init struct slvl_board *slvl_init(int iobase, int irq, return NULL; } - b = kmalloc(sizeof(struct slvl_board), GFP_KERNEL); + b = kzalloc(sizeof(struct slvl_board), GFP_KERNEL); if(!b) goto fail3; - memset(b, 0, sizeof(*b)); if (!(b->dev[0]= slvl_alloc(iobase, irq))) goto fail2; diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index c73601574334..3c78f9856380 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -599,7 +599,7 @@ static int __devinit wanxl_pci_init_one(struct pci_dev *pdev, } alloc_size = sizeof(card_t) + ports * sizeof(port_t); - card = kmalloc(alloc_size, GFP_KERNEL); + card = kzalloc(alloc_size, GFP_KERNEL); if (card == NULL) { printk(KERN_ERR "wanXL %s: unable to allocate memory\n", pci_name(pdev)); @@ -607,7 +607,6 @@ static int __devinit wanxl_pci_init_one(struct pci_dev *pdev, pci_disable_device(pdev); return -ENOBUFS; } - memset(card, 0, alloc_size); pci_set_drvdata(pdev, card); card->pdev = pdev; diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 1c9edd97accd..c48b1cc63fd5 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -786,14 +786,12 @@ static int __init init_x25_asy(void) printk(KERN_INFO "X.25 async: version 0.00 ALPHA " "(dynamic channels, max=%d).\n", x25_asy_maxdev ); - x25_asy_devs = kmalloc(sizeof(struct net_device *)*x25_asy_maxdev, - GFP_KERNEL); + x25_asy_devs = kcalloc(x25_asy_maxdev, sizeof(struct net_device*), GFP_KERNEL); if (!x25_asy_devs) { printk(KERN_WARNING "X25 async: Can't allocate x25_asy_ctrls[] " "array! Uaargh! (-> No X.25 available)\n"); return -ENOMEM; } - memset(x25_asy_devs, 0, sizeof(struct net_device *)*x25_asy_maxdev); return tty_register_ldisc(N_X25, &x25_ldisc); } diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 072ede71e575..8990585bd228 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -7868,10 +7868,10 @@ static int ipw2100_wx_set_powermode(struct net_device *dev, goto done; } - if ((mode < 1) || (mode > POWER_MODES)) + if ((mode < 0) || (mode > POWER_MODES)) mode = IPW_POWER_AUTO; - if (priv->power_mode != mode) + if (IPW_POWER_LEVEL(priv->power_mode) != mode) err = ipw2100_set_power_mode(priv, mode); done: mutex_unlock(&priv->action_mutex); @@ -7902,7 +7902,7 @@ static int ipw2100_wx_get_powermode(struct net_device *dev, break; case IPW_POWER_AUTO: snprintf(extra, MAX_POWER_STRING, - "Power save level: %d (Auto)", 0); + "Power save level: %d (Auto)", level); break; default: timeout = timeout_duration[level - 1] / 1000; diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index aa32a97380ec..61497c467467 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -70,7 +70,7 @@ #define VQ #endif -#define IPW2200_VERSION "1.2.0" VK VD VM VP VR VQ +#define IPW2200_VERSION "1.2.2" VK VD VM VP VR VQ #define DRV_DESCRIPTION "Intel(R) PRO/Wireless 2200/2915 Network Driver" #define DRV_COPYRIGHT "Copyright(c) 2003-2006 Intel Corporation" #define DRV_VERSION IPW2200_VERSION @@ -2506,7 +2506,7 @@ static int ipw_send_power_mode(struct ipw_priv *priv, u32 mode) break; } - param = cpu_to_le32(mode); + param = cpu_to_le32(param); return ipw_send_cmd_pdu(priv, IPW_CMD_POWER_MODE, sizeof(param), ¶m); } @@ -9568,6 +9568,7 @@ static int ipw_wx_set_power(struct net_device *dev, priv->power_mode = IPW_POWER_ENABLED | IPW_POWER_BATTERY; else priv->power_mode = IPW_POWER_ENABLED | priv->power_mode; + err = ipw_send_power_mode(priv, IPW_POWER_LEVEL(priv->power_mode)); if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); @@ -9604,22 +9605,19 @@ static int ipw_wx_set_powermode(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); int mode = *(int *)extra; int err; + mutex_lock(&priv->mutex); - if ((mode < 1) || (mode > IPW_POWER_LIMIT)) { + if ((mode < 1) || (mode > IPW_POWER_LIMIT)) mode = IPW_POWER_AC; - priv->power_mode = mode; - } else { - priv->power_mode = IPW_POWER_ENABLED | mode; - } - if (priv->power_mode != mode) { + if (IPW_POWER_LEVEL(priv->power_mode) != mode) { err = ipw_send_power_mode(priv, mode); - if (err) { IPW_DEBUG_WX("failed setting power mode.\n"); mutex_unlock(&priv->mutex); return err; } + priv->power_mode = IPW_POWER_ENABLED | mode; } mutex_unlock(&priv->mutex); return 0; @@ -10555,7 +10553,7 @@ static irqreturn_t ipw_isr(int irq, void *data) spin_lock(&priv->irq_lock); if (!(priv->status & STATUS_INT_ENABLED)) { - /* Shared IRQ */ + /* IRQ is disabled */ goto none; } diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 28d41a29d7b1..a9c339ef116a 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -72,6 +72,8 @@ static struct usb_device_id usb_ids[] = { { USB_DEVICE(0x0586, 0x3413), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0053, 0x5301), .driver_info = DEVICE_ZD1211B }, { USB_DEVICE(0x0411, 0x00da), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x2019, 0x5303), .driver_info = DEVICE_ZD1211B }, + { USB_DEVICE(0x129b, 0x1667), .driver_info = DEVICE_ZD1211B }, /* "Driverless" devices that need ejecting */ { USB_DEVICE(0x0ace, 0x2011), .driver_info = DEVICE_INSTALLER }, { USB_DEVICE(0x0ace, 0x20ff), .driver_info = DEVICE_INSTALLER }, diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 3a0a3a734933..e503c9c98032 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -466,9 +466,8 @@ static struct nubus_dev* __init parent->base, dir.base); /* Actually we should probably panic if this fails */ - if ((dev = kmalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) + if ((dev = kzalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) return NULL; - memset(dev, 0, sizeof(*dev)); dev->resid = parent->type; dev->directory = dir.base; dev->board = board; @@ -800,9 +799,8 @@ static struct nubus_board* __init nubus_add_board(int slot, int bytelanes) nubus_rewind(&rp, FORMAT_BLOCK_SIZE, bytelanes); /* Actually we should probably panic if this fails */ - if ((board = kmalloc(sizeof(*board), GFP_ATOMIC)) == NULL) + if ((board = kzalloc(sizeof(*board), GFP_ATOMIC)) == NULL) return NULL; - memset(board, 0, sizeof(*board)); board->fblock = rp; /* Dump the format block for debugging purposes */ diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c index 8b7d84eca05d..802a81d47367 100644 --- a/drivers/parport/parport_cs.c +++ b/drivers/parport/parport_cs.c @@ -105,9 +105,8 @@ static int parport_probe(struct pcmcia_device *link) DEBUG(0, "parport_attach()\n"); /* Create new parport device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); link->priv = info; info->p_dev = link; diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 90ea3b8b99b0..bd6ad8b38168 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -324,10 +324,9 @@ static int __devinit parport_serial_pci_probe (struct pci_dev *dev, struct parport_serial_private *priv; int err; - priv = kmalloc (sizeof *priv, GFP_KERNEL); + priv = kzalloc (sizeof *priv, GFP_KERNEL); if (!priv) return -ENOMEM; - memset(priv, 0, sizeof(struct parport_serial_private)); pci_set_drvdata (dev, priv); err = pci_enable_device (dev); diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 6846fb42b399..ad90a01b0dfc 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -148,11 +148,10 @@ static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev) { struct aer_rpc *rpc; - if (!(rpc = kmalloc(sizeof(struct aer_rpc), + if (!(rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL))) return NULL; - memset(rpc, 0, sizeof(struct aer_rpc)); /* * Initialize Root lock access, e_lock, to Root Error Status Reg, * Root Error ID Reg, and Root error producer/consumer index. diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 143c6efc478a..a99607142fc8 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -1127,6 +1127,34 @@ static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp, #endif +/************************ runtime PM support ***************************/ + +static int pcmcia_dev_suspend(struct device *dev, pm_message_t state); +static int pcmcia_dev_resume(struct device *dev); + +static int runtime_suspend(struct device *dev) +{ + int rc; + + down(&dev->sem); + rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); + up(&dev->sem); + if (!rc) + dev->power.power_state.event = PM_EVENT_SUSPEND; + return rc; +} + +static void runtime_resume(struct device *dev) +{ + int rc; + + down(&dev->sem); + rc = pcmcia_dev_resume(dev); + up(&dev->sem); + if (!rc) + dev->power.power_state.event = PM_EVENT_ON; +} + /************************ per-device sysfs output ***************************/ #define pcmcia_device_attr(field, test, format) \ @@ -1173,9 +1201,9 @@ static ssize_t pcmcia_store_pm_state(struct device *dev, struct device_attribute return -EINVAL; if ((!p_dev->suspended) && !strncmp(buf, "off", 3)) - ret = dpm_runtime_suspend(dev, PMSG_SUSPEND); + ret = runtime_suspend(dev); else if (p_dev->suspended && !strncmp(buf, "on", 2)) - dpm_runtime_resume(dev); + runtime_resume(dev); return ret ? ret : count; } @@ -1312,10 +1340,10 @@ static int pcmcia_bus_suspend_callback(struct device *dev, void * _data) struct pcmcia_socket *skt = _data; struct pcmcia_device *p_dev = to_pcmcia_dev(dev); - if (p_dev->socket != skt) + if (p_dev->socket != skt || p_dev->suspended) return 0; - return dpm_runtime_suspend(dev, PMSG_SUSPEND); + return runtime_suspend(dev); } static int pcmcia_bus_resume_callback(struct device *dev, void * _data) @@ -1323,10 +1351,10 @@ static int pcmcia_bus_resume_callback(struct device *dev, void * _data) struct pcmcia_socket *skt = _data; struct pcmcia_device *p_dev = to_pcmcia_dev(dev); - if (p_dev->socket != skt) + if (p_dev->socket != skt || !p_dev->suspended) return 0; - dpm_runtime_resume(dev); + runtime_resume(dev); return 0; } diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index 3e20b1cc7778..8e7b2dd38810 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -35,12 +35,11 @@ void *pnp_alloc(long size) { void *result; - result = kmalloc(size, GFP_KERNEL); + result = kzalloc(size, GFP_KERNEL); if (!result){ printk(KERN_ERR "pnp: Out of Memory\n"); return NULL; } - memset(result, 0, size); return result; } diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index f935c1f71a58..44420723a359 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -297,11 +297,10 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, struct rio_switch *rswitch; int result, rdid; - rdev = kmalloc(sizeof(struct rio_dev), GFP_KERNEL); + rdev = kzalloc(sizeof(struct rio_dev), GFP_KERNEL); if (!rdev) goto out; - memset(rdev, 0, sizeof(struct rio_dev)); rdev->net = net; rio_mport_read_config_32(port, destid, hopcount, RIO_DEV_ID_CAR, &result); @@ -801,9 +800,8 @@ static struct rio_net __devinit *rio_alloc_net(struct rio_mport *port) { struct rio_net *net; - net = kmalloc(sizeof(struct rio_net), GFP_KERNEL); + net = kzalloc(sizeof(struct rio_net), GFP_KERNEL); if (net) { - memset(net, 0, sizeof(struct rio_net)); INIT_LIST_HEAD(&net->node); INIT_LIST_HEAD(&net->devices); INIT_LIST_HEAD(&net->mports); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index e24ea82dc35b..5d760bb6c2cd 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -235,7 +235,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) return 0; } -static int cmos_set_freq(struct device *dev, int freq) +static int cmos_irq_set_freq(struct device *dev, int freq) { struct cmos_rtc *cmos = dev_get_drvdata(dev); int f; @@ -259,6 +259,34 @@ static int cmos_set_freq(struct device *dev, int freq) return 0; } +static int cmos_irq_set_state(struct device *dev, int enabled) +{ + struct cmos_rtc *cmos = dev_get_drvdata(dev); + unsigned char rtc_control, rtc_intr; + unsigned long flags; + + if (!is_valid_irq(cmos->irq)) + return -ENXIO; + + spin_lock_irqsave(&rtc_lock, flags); + rtc_control = CMOS_READ(RTC_CONTROL); + + if (enabled) + rtc_control |= RTC_PIE; + else + rtc_control &= ~RTC_PIE; + + CMOS_WRITE(rtc_control, RTC_CONTROL); + + rtc_intr = CMOS_READ(RTC_INTR_FLAGS); + rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF; + if (is_intr(rtc_intr)) + rtc_update_irq(cmos->rtc, 1, rtc_intr); + + spin_unlock_irqrestore(&rtc_lock, flags); + return 0; +} + #if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE) static int @@ -360,7 +388,8 @@ static const struct rtc_class_ops cmos_rtc_ops = { .read_alarm = cmos_read_alarm, .set_alarm = cmos_set_alarm, .proc = cmos_procfs, - .irq_set_freq = cmos_set_freq, + .irq_set_freq = cmos_irq_set_freq, + .irq_set_state = cmos_irq_set_state, }; /*----------------------------------------------------------------*/ diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index e765875e8db2..80e7a537e7d2 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -131,10 +131,9 @@ tape_34xx_schedule_work(struct tape_device *device, enum tape_op op) { struct tape_34xx_work *p; - if ((p = kmalloc(sizeof(*p), GFP_ATOMIC)) == NULL) + if ((p = kzalloc(sizeof(*p), GFP_ATOMIC)) == NULL) return -ENOMEM; - memset(p, 0, sizeof(*p)); INIT_WORK(&p->work, tape_34xx_work_handler); p->device = tape_get_device_reference(device); diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index 348bb7b82771..023455a0b34a 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -317,8 +317,8 @@ claw_probe(struct ccwgroup_device *cgdev) CLAW_DBF_TEXT_(2,setup,"probex%d",-ENOMEM); return -ENOMEM; } - privptr->p_mtc_envelope= kmalloc( MAX_ENVELOPE_SIZE, GFP_KERNEL); - privptr->p_env = kmalloc(sizeof(struct claw_env), GFP_KERNEL); + privptr->p_mtc_envelope= kzalloc( MAX_ENVELOPE_SIZE, GFP_KERNEL); + privptr->p_env = kzalloc(sizeof(struct claw_env), GFP_KERNEL); if ((privptr->p_mtc_envelope==NULL) || (privptr->p_env==NULL)) { probe_error(cgdev); put_device(&cgdev->dev); @@ -327,8 +327,6 @@ claw_probe(struct ccwgroup_device *cgdev) CLAW_DBF_TEXT_(2,setup,"probex%d",-ENOMEM); return -ENOMEM; } - memset(privptr->p_mtc_envelope, 0x00, MAX_ENVELOPE_SIZE); - memset(privptr->p_env, 0x00, sizeof(struct claw_env)); memcpy(privptr->p_env->adapter_name,WS_NAME_NOT_DEF,8); memcpy(privptr->p_env->host_name,WS_NAME_NOT_DEF,8); memcpy(privptr->p_env->api_type,WS_NAME_NOT_DEF,8); @@ -3924,7 +3922,7 @@ add_channel(struct ccw_device *cdev,int i,struct claw_privbk *privptr) snprintf(p_ch->id, CLAW_ID_SIZE, "cl-%s", cdev->dev.bus_id); ccw_device_get_id(cdev, &dev_id); p_ch->devno = dev_id.devno; - if ((p_ch->irb = kmalloc(sizeof (struct irb),GFP_KERNEL)) == NULL) { + if ((p_ch->irb = kzalloc(sizeof (struct irb),GFP_KERNEL)) == NULL) { printk(KERN_WARNING "%s Out of memory in %s for irb\n", p_ch->id,__FUNCTION__); #ifdef FUNCTRACE @@ -3933,7 +3931,6 @@ add_channel(struct ccw_device *cdev,int i,struct claw_privbk *privptr) #endif return -ENOMEM; } - memset(p_ch->irb, 0, sizeof (struct irb)); #ifdef FUNCTRACE printk(KERN_INFO "%s:%s Exit on line %d\n", cdev->dev.bus_id,__FUNCTION__,__LINE__); diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 178155bf9db6..fbadd4d761f3 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -156,10 +156,9 @@ struct bbc_i2c_client *bbc_i2c_attach(struct linux_ebus_child *echild) if (!bp) return NULL; - client = kmalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) return NULL; - memset(client, 0, sizeof(*client)); client->bp = bp; client->echild = echild; client->bus = echild->resource[0].start; diff --git a/drivers/sbus/char/vfc_dev.c b/drivers/sbus/char/vfc_dev.c index 6afc7e5df0d4..26b1d2a17ed2 100644 --- a/drivers/sbus/char/vfc_dev.c +++ b/drivers/sbus/char/vfc_dev.c @@ -656,12 +656,9 @@ static int vfc_probe(void) if (!cards) return -ENODEV; - vfc_dev_lst = kmalloc(sizeof(struct vfc_dev *) * - (cards+1), - GFP_KERNEL); + vfc_dev_lst = kcalloc(cards + 1, sizeof(struct vfc_dev*), GFP_KERNEL); if (vfc_dev_lst == NULL) return -ENOMEM; - memset(vfc_dev_lst, 0, sizeof(struct vfc_dev *) * (cards + 1)); vfc_dev_lst[cards] = NULL; ret = register_chrdev(VFC_MAJOR, vfcstr, &vfc_fops); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 76c09097175f..6b49f6a2524d 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1160,13 +1160,12 @@ static int twa_initialize_device_extension(TW_Device_Extension *tw_dev) } /* Allocate event info space */ - tw_dev->event_queue[0] = kmalloc(sizeof(TW_Event) * TW_Q_LENGTH, GFP_KERNEL); + tw_dev->event_queue[0] = kcalloc(TW_Q_LENGTH, sizeof(TW_Event), GFP_KERNEL); if (!tw_dev->event_queue[0]) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x18, "Event info memory allocation failed"); goto out; } - memset(tw_dev->event_queue[0], 0, sizeof(TW_Event) * TW_Q_LENGTH); for (i = 0; i < TW_Q_LENGTH; i++) { tw_dev->event_queue[i] = (TW_Event *)((unsigned char *)tw_dev->event_queue[0] + (i * sizeof(TW_Event))); diff --git a/drivers/scsi/NCR53C9x.c b/drivers/scsi/NCR53C9x.c index 8b5334c56f0a..773d11dd9953 100644 --- a/drivers/scsi/NCR53C9x.c +++ b/drivers/scsi/NCR53C9x.c @@ -3606,11 +3606,10 @@ out: int esp_slave_alloc(struct scsi_device *SDptr) { struct esp_device *esp_dev = - kmalloc(sizeof(struct esp_device), GFP_ATOMIC); + kzalloc(sizeof(struct esp_device), GFP_ATOMIC); if (!esp_dev) return -ENOMEM; - memset(esp_dev, 0, sizeof(struct esp_device)); SDptr->hostdata = esp_dev; return 0; } diff --git a/drivers/scsi/NCR_D700.c b/drivers/scsi/NCR_D700.c index f12864abed2f..3a8089705feb 100644 --- a/drivers/scsi/NCR_D700.c +++ b/drivers/scsi/NCR_D700.c @@ -181,13 +181,12 @@ NCR_D700_probe_one(struct NCR_D700_private *p, int siop, int irq, struct Scsi_Host *host; int ret; - hostdata = kmalloc(sizeof(*hostdata), GFP_KERNEL); + hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); if (!hostdata) { printk(KERN_ERR "NCR D700: SIOP%d: Failed to allocate host" "data, detatching\n", siop); return -ENOMEM; } - memset(hostdata, 0, sizeof(*hostdata)); if (!request_region(region, 64, "NCR_D700")) { printk(KERN_ERR "NCR D700: Failed to reserve IO region 0x%x\n", diff --git a/drivers/scsi/NCR_Q720.c b/drivers/scsi/NCR_Q720.c index 778844c3544a..a8bbdc2273b8 100644 --- a/drivers/scsi/NCR_Q720.c +++ b/drivers/scsi/NCR_Q720.c @@ -148,11 +148,10 @@ NCR_Q720_probe(struct device *dev) __u32 base_addr, mem_size; void __iomem *mem_base; - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - memset(p, 0, sizeof(*p)); pos2 = mca_device_read_pos(mca_dev, 2); /* enable device */ pos2 |= NCR_Q720_POS2_BOARD_ENABLE | NCR_Q720_POS2_INTERRUPT_ENABLE; diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c index 0464c182c577..005d2b05f32d 100644 --- a/drivers/scsi/imm.c +++ b/drivers/scsi/imm.c @@ -1159,11 +1159,10 @@ static int __imm_attach(struct parport *pb) init_waitqueue_head(&waiting); - dev = kmalloc(sizeof(imm_struct), GFP_KERNEL); + dev = kzalloc(sizeof(imm_struct), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(imm_struct)); dev->base = -1; dev->mode = IMM_AUTODETECT; diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 9f8ed6b81576..492a51bd6aa8 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -7068,14 +7068,13 @@ ips_init_phase1(struct pci_dev *pci_dev, int *indexPtr) subdevice_id = pci_dev->subsystem_device; /* found a controller */ - ha = kmalloc(sizeof (ips_ha_t), GFP_KERNEL); + ha = kzalloc(sizeof (ips_ha_t), GFP_KERNEL); if (ha == NULL) { IPS_PRINTK(KERN_WARNING, pci_dev, "Unable to allocate temporary ha struct\n"); return -1; } - memset(ha, 0, sizeof (ips_ha_t)); ips_sh[index] = NULL; ips_ha[index] = ha; diff --git a/drivers/scsi/lasi700.c b/drivers/scsi/lasi700.c index 5c32a69e41ba..3126824da36d 100644 --- a/drivers/scsi/lasi700.c +++ b/drivers/scsi/lasi700.c @@ -101,13 +101,12 @@ lasi700_probe(struct parisc_device *dev) struct NCR_700_Host_Parameters *hostdata; struct Scsi_Host *host; - hostdata = kmalloc(sizeof(*hostdata), GFP_KERNEL); + hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); if (!hostdata) { printk(KERN_ERR "%s: Failed to allocate host data\n", dev->dev.bus_id); return -ENOMEM; } - memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); hostdata->dev = &dev->dev; dma_set_mask(&dev->dev, DMA_32BIT_MASK); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f81f85ee190f..07bd0dcdf0d6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1830,7 +1830,7 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid) /* Initialize and populate the iocb list per host. */ INIT_LIST_HEAD(&phba->lpfc_iocb_list); for (i = 0; i < LPFC_IOCB_LIST_CNT; i++) { - iocbq_entry = kmalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL); + iocbq_entry = kzalloc(sizeof(struct lpfc_iocbq), GFP_KERNEL); if (iocbq_entry == NULL) { printk(KERN_ERR "%s: only allocated %d iocbs of " "expected %d count. Unloading driver.\n", @@ -1839,7 +1839,6 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid) goto out_free_iocbq; } - memset(iocbq_entry, 0, sizeof(struct lpfc_iocbq)); iotag = lpfc_sli_next_iotag(phba, iocbq_entry); if (iotag == 0) { kfree (iocbq_entry); diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index c46685a03a9f..c6a53dccc16a 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -454,7 +454,7 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); // Allocate the per driver initialization structure - adapter = kmalloc(sizeof(adapter_t), GFP_KERNEL); + adapter = kzalloc(sizeof(adapter_t), GFP_KERNEL); if (adapter == NULL) { con_log(CL_ANN, (KERN_WARNING @@ -462,7 +462,6 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto out_probe_one; } - memset(adapter, 0, sizeof(adapter_t)); // set up PCI related soft state and other pre-known parameters @@ -746,10 +745,9 @@ megaraid_init_mbox(adapter_t *adapter) * Allocate and initialize the init data structure for mailbox * controllers */ - raid_dev = kmalloc(sizeof(mraid_device_t), GFP_KERNEL); + raid_dev = kzalloc(sizeof(mraid_device_t), GFP_KERNEL); if (raid_dev == NULL) return -1; - memset(raid_dev, 0, sizeof(mraid_device_t)); /* * Attach the adapter soft state to raid device soft state @@ -1050,8 +1048,7 @@ megaraid_alloc_cmd_packets(adapter_t *adapter) * since the calling routine does not yet know the number of available * commands. */ - adapter->kscb_list = kmalloc(sizeof(scb_t) * MBOX_MAX_SCSI_CMDS, - GFP_KERNEL); + adapter->kscb_list = kcalloc(MBOX_MAX_SCSI_CMDS, sizeof(scb_t), GFP_KERNEL); if (adapter->kscb_list == NULL) { con_log(CL_ANN, (KERN_WARNING @@ -1059,7 +1056,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter) __LINE__)); goto out_free_ibuf; } - memset(adapter->kscb_list, 0, sizeof(scb_t) * MBOX_MAX_SCSI_CMDS); // memory allocation for our command packets if (megaraid_mbox_setup_dma_pools(adapter) != 0) { @@ -3495,8 +3491,7 @@ megaraid_cmm_register(adapter_t *adapter) int i; // Allocate memory for the base list of scb for management module. - adapter->uscb_list = kmalloc(sizeof(scb_t) * MBOX_MAX_USER_CMDS, - GFP_KERNEL); + adapter->uscb_list = kcalloc(MBOX_MAX_USER_CMDS, sizeof(scb_t), GFP_KERNEL); if (adapter->uscb_list == NULL) { con_log(CL_ANN, (KERN_WARNING @@ -3504,7 +3499,6 @@ megaraid_cmm_register(adapter_t *adapter) __LINE__)); return -1; } - memset(adapter->uscb_list, 0, sizeof(scb_t) * MBOX_MAX_USER_CMDS); // Initialize the synchronization parameters for resources for diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index 84d9c27133d4..b6587a6d8486 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -890,12 +890,11 @@ mraid_mm_register_adp(mraid_mmadp_t *lld_adp) if (lld_adp->drvr_type != DRVRTYPE_MBOX) return (-EINVAL); - adapter = kmalloc(sizeof(mraid_mmadp_t), GFP_KERNEL); + adapter = kzalloc(sizeof(mraid_mmadp_t), GFP_KERNEL); if (!adapter) return -ENOMEM; - memset(adapter, 0, sizeof(mraid_mmadp_t)); adapter->unique_id = lld_adp->unique_id; adapter->drvr_type = lld_adp->drvr_type; diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index b7f2e613c903..ebb948c016bb 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -1636,15 +1636,13 @@ static int megasas_alloc_cmds(struct megasas_instance *instance) * Allocate the dynamic array first and then allocate individual * commands. */ - instance->cmd_list = kmalloc(sizeof(struct megasas_cmd *) * max_cmd, - GFP_KERNEL); + instance->cmd_list = kcalloc(max_cmd, sizeof(struct megasas_cmd*), GFP_KERNEL); if (!instance->cmd_list) { printk(KERN_DEBUG "megasas: out of memory\n"); return -ENOMEM; } - memset(instance->cmd_list, 0, sizeof(struct megasas_cmd *) * max_cmd); for (i = 0; i < max_cmd; i++) { instance->cmd_list[i] = kmalloc(sizeof(struct megasas_cmd), diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c index 370802d24acd..2dd0dc9a9aed 100644 --- a/drivers/scsi/pcmcia/aha152x_stub.c +++ b/drivers/scsi/pcmcia/aha152x_stub.c @@ -106,9 +106,8 @@ static int aha152x_probe(struct pcmcia_device *link) DEBUG(0, "aha152x_attach()\n"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index c6f8c6e65e05..445cfbbca9b3 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1602,9 +1602,8 @@ static int nsp_cs_probe(struct pcmcia_device *link) nsp_dbg(NSP_DEBUG_INIT, "in"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (info == NULL) { return -ENOMEM; } - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; data->ScsiInfo = info; diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 697cfb76c3a4..67c5a58d17df 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -162,10 +162,9 @@ static int qlogic_probe(struct pcmcia_device *link) DEBUG(0, "qlogic_attach()\n"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; link->io.NumPorts1 = 16; diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index 2695b7187b2f..961839ecfe86 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -875,10 +875,9 @@ SYM53C500_probe(struct pcmcia_device *link) DEBUG(0, "SYM53C500_attach()\n"); /* Create new SCSI device */ - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - memset(info, 0, sizeof(*info)); info->p_dev = link; link->priv = info; link->io.NumPorts1 = 16; diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c index 2f1fa1eb7e90..67b6d76a6c8d 100644 --- a/drivers/scsi/ppa.c +++ b/drivers/scsi/ppa.c @@ -1014,10 +1014,9 @@ static int __ppa_attach(struct parport *pb) int modes, ppb, ppb_hi; int err = -ENOMEM; - dev = kmalloc(sizeof(ppa_struct), GFP_KERNEL); + dev = kzalloc(sizeof(ppa_struct), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(ppa_struct)); dev->base = -1; dev->mode = PPA_AUTODETECT; dev->recon_tmo = PPA_RECON_TMO; diff --git a/drivers/scsi/sim710.c b/drivers/scsi/sim710.c index 018c65f73ac4..710f19de3d40 100644 --- a/drivers/scsi/sim710.c +++ b/drivers/scsi/sim710.c @@ -100,7 +100,7 @@ sim710_probe_common(struct device *dev, unsigned long base_addr, { struct Scsi_Host * host = NULL; struct NCR_700_Host_Parameters *hostdata = - kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); + kzalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); printk(KERN_NOTICE "sim710: %s\n", dev->bus_id); printk(KERN_NOTICE "sim710: irq = %d, clock = %d, base = 0x%lx, scsi_id = %d\n", @@ -110,7 +110,6 @@ sim710_probe_common(struct device *dev, unsigned long base_addr, printk(KERN_ERR "sim710: Failed to allocate host data\n"); goto out; } - memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); if(request_region(base_addr, 64, "sim710") == NULL) { printk(KERN_ERR "sim710: Failed to reserve IO region 0x%lx\n", diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c index 14cba1ca38b3..5db1520f8ba9 100644 --- a/drivers/scsi/tmscsim.c +++ b/drivers/scsi/tmscsim.c @@ -2082,10 +2082,9 @@ static int dc390_slave_alloc(struct scsi_device *scsi_device) uint id = scsi_device->id; uint lun = scsi_device->lun; - pDCB = kmalloc(sizeof(struct dc390_dcb), GFP_KERNEL); + pDCB = kzalloc(sizeof(struct dc390_dcb), GFP_KERNEL); if (!pDCB) return -ENOMEM; - memset(pDCB, 0, sizeof(struct dc390_dcb)); if (!pACB->DCBCnt++) { pACB->pLinkDCB = pDCB; diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 954073c6ce3a..72229df9dc11 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -716,7 +716,7 @@ static int pl011_probe(struct amba_device *dev, void *id) goto out; } - uap = kmalloc(sizeof(struct uart_amba_port), GFP_KERNEL); + uap = kzalloc(sizeof(struct uart_amba_port), GFP_KERNEL); if (uap == NULL) { ret = -ENOMEM; goto out; @@ -728,7 +728,6 @@ static int pl011_probe(struct amba_device *dev, void *id) goto free; } - memset(uap, 0, sizeof(struct uart_amba_port)); uap->clk = clk_get(&dev->dev, "UARTCLK"); if (IS_ERR(uap->clk)) { ret = PTR_ERR(uap->clk); diff --git a/drivers/sh/superhyway/superhyway.c b/drivers/sh/superhyway/superhyway.c index 94b229031198..7d873b3b0513 100644 --- a/drivers/sh/superhyway/superhyway.c +++ b/drivers/sh/superhyway/superhyway.c @@ -56,11 +56,10 @@ int superhyway_add_device(unsigned long base, struct superhyway_device *sdev, struct superhyway_device *dev = sdev; if (!dev) { - dev = kmalloc(sizeof(struct superhyway_device), GFP_KERNEL); + dev = kzalloc(sizeof(struct superhyway_device), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(struct superhyway_device)); } dev->bus = bus; diff --git a/drivers/sn/ioc3.c b/drivers/sn/ioc3.c index 2dd6eed50aa0..29fcd6d0301d 100644 --- a/drivers/sn/ioc3.c +++ b/drivers/sn/ioc3.c @@ -629,7 +629,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) #endif /* Set up per-IOC3 data */ - idd = kmalloc(sizeof(struct ioc3_driver_data), GFP_KERNEL); + idd = kzalloc(sizeof(struct ioc3_driver_data), GFP_KERNEL); if (!idd) { printk(KERN_WARNING "%s: Failed to allocate IOC3 data for pci_dev %s.\n", @@ -637,7 +637,6 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) ret = -ENODEV; goto out_idd; } - memset(idd, 0, sizeof(struct ioc3_driver_data)); spin_lock_init(&idd->ir_lock); spin_lock_init(&idd->gpio_lock); idd->pdev = pdev; diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c index 3e658dc7c2d8..ff9a29b76336 100644 --- a/drivers/telephony/ixj_pcmcia.c +++ b/drivers/telephony/ixj_pcmcia.c @@ -45,11 +45,10 @@ static int ixj_probe(struct pcmcia_device *p_dev) p_dev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; p_dev->io.IOAddrLines = 3; p_dev->conf.IntType = INT_MEMORY_AND_IO; - p_dev->priv = kmalloc(sizeof(struct ixj_info_t), GFP_KERNEL); + p_dev->priv = kzalloc(sizeof(struct ixj_info_t), GFP_KERNEL); if (!p_dev->priv) { return -ENOMEM; } - memset(p_dev->priv, 0, sizeof(struct ixj_info_t)); return ixj_config(p_dev); } diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig new file mode 100644 index 000000000000..b778ed71f636 --- /dev/null +++ b/drivers/uio/Kconfig @@ -0,0 +1,29 @@ +menu "Userspace I/O" + depends on !S390 + +config UIO + tristate "Userspace I/O drivers" + default n + help + Enable this to allow the userspace driver core code to be + built. This code allows userspace programs easy access to + kernel interrupts and memory locations, allowing some drivers + to be written in userspace. Note that a small kernel driver + is also required for interrupt handling to work properly. + + If you don't know what to do here, say N. + +config UIO_CIF + tristate "generic Hilscher CIF Card driver" + depends on UIO && PCI + default n + help + Driver for Hilscher CIF DeviceNet and Profibus cards. This + driver requires a userspace component that handles all of the + heavy lifting and can be found at: + http://www.osadl.org/projects/downloads/UIO/user/cif-* + + To compile this driver as a module, choose M here: the module + will be called uio_cif. + +endmenu diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile new file mode 100644 index 000000000000..7fecfb459da5 --- /dev/null +++ b/drivers/uio/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_UIO) += uio.o +obj-$(CONFIG_UIO_CIF) += uio_cif.o diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c new file mode 100644 index 000000000000..865f32b63b5c --- /dev/null +++ b/drivers/uio/uio.c @@ -0,0 +1,701 @@ +/* + * drivers/uio/uio.c + * + * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de> + * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de> + * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com> + * + * Userspace IO + * + * Base Functions + * + * Licensed under the GPLv2 only. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/idr.h> +#include <linux/string.h> +#include <linux/kobject.h> +#include <linux/uio_driver.h> + +#define UIO_MAX_DEVICES 255 + +struct uio_device { + struct module *owner; + struct device *dev; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + int vma_count; + struct uio_info *info; + struct kset map_attr_kset; +}; + +static int uio_major; +static DEFINE_IDR(uio_idr); +static struct file_operations uio_fops; + +/* UIO class infrastructure */ +static struct uio_class { + struct kref kref; + struct class *class; +} *uio_class; + +/* + * attributes + */ + +static struct attribute attr_addr = { + .name = "addr", + .mode = S_IRUGO, +}; + +static struct attribute attr_size = { + .name = "size", + .mode = S_IRUGO, +}; + +static struct attribute* map_attrs[] = { + &attr_addr, &attr_size, NULL +}; + +static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct uio_mem *mem = container_of(kobj, struct uio_mem, kobj); + + if (strncmp(attr->name,"addr",4) == 0) + return sprintf(buf, "0x%lx\n", mem->addr); + + if (strncmp(attr->name,"size",4) == 0) + return sprintf(buf, "0x%lx\n", mem->size); + + return -ENODEV; +} + +static void map_attr_release(struct kobject *kobj) +{ + /* TODO ??? */ +} + +static struct sysfs_ops map_attr_ops = { + .show = map_attr_show, +}; + +static struct kobj_type map_attr_type = { + .release = map_attr_release, + .sysfs_ops = &map_attr_ops, + .default_attrs = map_attrs, +}; + +static ssize_t show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%s\n", idev->info->name); + else + return -ENODEV; +} +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); + +static ssize_t show_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%s\n", idev->info->version); + else + return -ENODEV; +} +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); + +static ssize_t show_event(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uio_device *idev = dev_get_drvdata(dev); + if (idev) + return sprintf(buf, "%u\n", + (unsigned int)atomic_read(&idev->event)); + else + return -ENODEV; +} +static DEVICE_ATTR(event, S_IRUGO, show_event, NULL); + +static struct attribute *uio_attrs[] = { + &dev_attr_name.attr, + &dev_attr_version.attr, + &dev_attr_event.attr, + NULL, +}; + +static struct attribute_group uio_attr_grp = { + .attrs = uio_attrs, +}; + +/* + * device functions + */ +static int uio_dev_add_attributes(struct uio_device *idev) +{ + int ret; + int mi; + int map_found = 0; + struct uio_mem *mem; + + ret = sysfs_create_group(&idev->dev->kobj, &uio_attr_grp); + if (ret) + goto err_group; + + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + mem = &idev->info->mem[mi]; + if (mem->size == 0) + break; + if (!map_found) { + map_found = 1; + kobject_set_name(&idev->map_attr_kset.kobj,"maps"); + idev->map_attr_kset.ktype = &map_attr_type; + idev->map_attr_kset.kobj.parent = &idev->dev->kobj; + ret = kset_register(&idev->map_attr_kset); + if (ret) + goto err_remove_group; + } + kobject_init(&mem->kobj); + kobject_set_name(&mem->kobj,"map%d",mi); + mem->kobj.parent = &idev->map_attr_kset.kobj; + mem->kobj.kset = &idev->map_attr_kset; + ret = kobject_add(&mem->kobj); + if (ret) + goto err_remove_maps; + } + + return 0; + +err_remove_maps: + for (mi--; mi>=0; mi--) { + mem = &idev->info->mem[mi]; + kobject_unregister(&mem->kobj); + } + kset_unregister(&idev->map_attr_kset); /* Needed ? */ +err_remove_group: + sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp); +err_group: + dev_err(idev->dev, "error creating sysfs files (%d)\n", ret); + return ret; +} + +static void uio_dev_del_attributes(struct uio_device *idev) +{ + int mi; + struct uio_mem *mem; + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + mem = &idev->info->mem[mi]; + if (mem->size == 0) + break; + kobject_unregister(&mem->kobj); + } + kset_unregister(&idev->map_attr_kset); + sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp); +} + +static int uio_get_minor(struct uio_device *idev) +{ + static DEFINE_MUTEX(minor_lock); + int retval = -ENOMEM; + int id; + + mutex_lock(&minor_lock); + if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0) + goto exit; + + retval = idr_get_new(&uio_idr, idev, &id); + if (retval < 0) { + if (retval == -EAGAIN) + retval = -ENOMEM; + goto exit; + } + idev->minor = id & MAX_ID_MASK; +exit: + mutex_unlock(&minor_lock); + return retval; +} + +static void uio_free_minor(struct uio_device *idev) +{ + idr_remove(&uio_idr, idev->minor); +} + +/** + * uio_event_notify - trigger an interrupt event + * @info: UIO device capabilities + */ +void uio_event_notify(struct uio_info *info) +{ + struct uio_device *idev = info->uio_dev; + + atomic_inc(&idev->event); + wake_up_interruptible(&idev->wait); + kill_fasync(&idev->async_queue, SIGIO, POLL_IN); +} +EXPORT_SYMBOL_GPL(uio_event_notify); + +/** + * uio_interrupt - hardware interrupt handler + * @irq: IRQ number, can be UIO_IRQ_CYCLIC for cyclic timer + * @dev_id: Pointer to the devices uio_device structure + */ +static irqreturn_t uio_interrupt(int irq, void *dev_id) +{ + struct uio_device *idev = (struct uio_device *)dev_id; + irqreturn_t ret = idev->info->handler(irq, idev->info); + + if (ret == IRQ_HANDLED) + uio_event_notify(idev->info); + + return ret; +} + +struct uio_listener { + struct uio_device *dev; + s32 event_count; +}; + +static int uio_open(struct inode *inode, struct file *filep) +{ + struct uio_device *idev; + struct uio_listener *listener; + int ret = 0; + + idev = idr_find(&uio_idr, iminor(inode)); + if (!idev) + return -ENODEV; + + listener = kmalloc(sizeof(*listener), GFP_KERNEL); + if (!listener) + return -ENOMEM; + + listener->dev = idev; + listener->event_count = atomic_read(&idev->event); + filep->private_data = listener; + + if (idev->info->open) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->open(idev->info, inode); + module_put(idev->owner); + } + + if (ret) + kfree(listener); + + return ret; +} + +static int uio_fasync(int fd, struct file *filep, int on) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + return fasync_helper(fd, filep, on, &idev->async_queue); +} + +static int uio_release(struct inode *inode, struct file *filep) +{ + int ret = 0; + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + if (idev->info->release) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->release(idev->info, inode); + module_put(idev->owner); + } + if (filep->f_flags & FASYNC) + ret = uio_fasync(-1, filep, 0); + kfree(listener); + return ret; +} + +static unsigned int uio_poll(struct file *filep, poll_table *wait) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + poll_wait(filep, &idev->wait, wait); + if (listener->event_count != atomic_read(&idev->event)) + return POLLIN | POLLRDNORM; + return 0; +} + +static ssize_t uio_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + DECLARE_WAITQUEUE(wait, current); + ssize_t retval; + s32 event_count; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + if (count != sizeof(s32)) + return -EINVAL; + + add_wait_queue(&idev->wait, &wait); + + do { + set_current_state(TASK_INTERRUPTIBLE); + + event_count = atomic_read(&idev->event); + if (event_count != listener->event_count) { + if (copy_to_user(buf, &event_count, count)) + retval = -EFAULT; + else { + listener->event_count = event_count; + retval = count; + } + break; + } + + if (filep->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + break; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + schedule(); + } while (1); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&idev->wait, &wait); + + return retval; +} + +static int uio_find_mem_index(struct vm_area_struct *vma) +{ + int mi; + struct uio_device *idev = vma->vm_private_data; + + for (mi = 0; mi < MAX_UIO_MAPS; mi++) { + if (idev->info->mem[mi].size == 0) + return -1; + if (vma->vm_pgoff == mi) + return mi; + } + return -1; +} + +static void uio_vma_open(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + idev->vma_count++; +} + +static void uio_vma_close(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + idev->vma_count--; +} + +static struct page *uio_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct uio_device *idev = vma->vm_private_data; + struct page* page = NOPAGE_SIGBUS; + + int mi = uio_find_mem_index(vma); + if (mi < 0) + return page; + + if (idev->info->mem[mi].memtype == UIO_MEM_LOGICAL) + page = virt_to_page(idev->info->mem[mi].addr); + else + page = vmalloc_to_page((void*)idev->info->mem[mi].addr); + get_page(page); + if (type) + *type = VM_FAULT_MINOR; + return page; +} + +static struct vm_operations_struct uio_vm_ops = { + .open = uio_vma_open, + .close = uio_vma_close, + .nopage = uio_vma_nopage, +}; + +static int uio_mmap_physical(struct vm_area_struct *vma) +{ + struct uio_device *idev = vma->vm_private_data; + int mi = uio_find_mem_index(vma); + if (mi < 0) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_RESERVED; + + return remap_pfn_range(vma, + vma->vm_start, + idev->info->mem[mi].addr >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static int uio_mmap_logical(struct vm_area_struct *vma) +{ + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &uio_vm_ops; + uio_vma_open(vma); + return 0; +} + +static int uio_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + int mi; + unsigned long requested_pages, actual_pages; + int ret = 0; + + if (vma->vm_end < vma->vm_start) + return -EINVAL; + + vma->vm_private_data = idev; + + mi = uio_find_mem_index(vma); + if (mi < 0) + return -EINVAL; + + requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + actual_pages = (idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT; + if (requested_pages > actual_pages) + return -EINVAL; + + if (idev->info->mmap) { + if (!try_module_get(idev->owner)) + return -ENODEV; + ret = idev->info->mmap(idev->info, vma); + module_put(idev->owner); + return ret; + } + + switch (idev->info->mem[mi].memtype) { + case UIO_MEM_PHYS: + return uio_mmap_physical(vma); + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + return uio_mmap_logical(vma); + default: + return -EINVAL; + } +} + +static struct file_operations uio_fops = { + .owner = THIS_MODULE, + .open = uio_open, + .release = uio_release, + .read = uio_read, + .mmap = uio_mmap, + .poll = uio_poll, + .fasync = uio_fasync, +}; + +static int uio_major_init(void) +{ + uio_major = register_chrdev(0, "uio", &uio_fops); + if (uio_major < 0) + return uio_major; + return 0; +} + +static void uio_major_cleanup(void) +{ + unregister_chrdev(uio_major, "uio"); +} + +static int init_uio_class(void) +{ + int ret = 0; + + if (uio_class != NULL) { + kref_get(&uio_class->kref); + goto exit; + } + + /* This is the first time in here, set everything up properly */ + ret = uio_major_init(); + if (ret) + goto exit; + + uio_class = kzalloc(sizeof(*uio_class), GFP_KERNEL); + if (!uio_class) { + ret = -ENOMEM; + goto err_kzalloc; + } + + kref_init(&uio_class->kref); + uio_class->class = class_create(THIS_MODULE, "uio"); + if (IS_ERR(uio_class->class)) { + ret = IS_ERR(uio_class->class); + printk(KERN_ERR "class_create failed for uio\n"); + goto err_class_create; + } + return 0; + +err_class_create: + kfree(uio_class); + uio_class = NULL; +err_kzalloc: + uio_major_cleanup(); +exit: + return ret; +} + +static void release_uio_class(struct kref *kref) +{ + /* Ok, we cheat as we know we only have one uio_class */ + class_destroy(uio_class->class); + kfree(uio_class); + uio_major_cleanup(); + uio_class = NULL; +} + +static void uio_class_destroy(void) +{ + if (uio_class) + kref_put(&uio_class->kref, release_uio_class); +} + +/** + * uio_register_device - register a new userspace IO device + * @owner: module that creates the new device + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ +int __uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info) +{ + struct uio_device *idev; + int ret = 0; + + if (!parent || !info || !info->name || !info->version) + return -EINVAL; + + info->uio_dev = NULL; + + ret = init_uio_class(); + if (ret) + return ret; + + idev = kzalloc(sizeof(*idev), GFP_KERNEL); + if (!idev) { + ret = -ENOMEM; + goto err_kzalloc; + } + + idev->owner = owner; + idev->info = info; + init_waitqueue_head(&idev->wait); + atomic_set(&idev->event, 0); + + ret = uio_get_minor(idev); + if (ret) + goto err_get_minor; + + idev->dev = device_create(uio_class->class, parent, + MKDEV(uio_major, idev->minor), + "uio%d", idev->minor); + if (IS_ERR(idev->dev)) { + printk(KERN_ERR "UIO: device register failed\n"); + ret = PTR_ERR(idev->dev); + goto err_device_create; + } + dev_set_drvdata(idev->dev, idev); + + ret = uio_dev_add_attributes(idev); + if (ret) + goto err_uio_dev_add_attributes; + + info->uio_dev = idev; + + if (idev->info->irq >= 0) { + ret = request_irq(idev->info->irq, uio_interrupt, + idev->info->irq_flags, idev->info->name, idev); + if (ret) + goto err_request_irq; + } + + return 0; + +err_request_irq: + uio_dev_del_attributes(idev); +err_uio_dev_add_attributes: + device_destroy(uio_class->class, MKDEV(uio_major, idev->minor)); +err_device_create: + uio_free_minor(idev); +err_get_minor: + kfree(idev); +err_kzalloc: + uio_class_destroy(); + return ret; +} +EXPORT_SYMBOL_GPL(__uio_register_device); + +/** + * uio_unregister_device - unregister a industrial IO device + * @info: UIO device capabilities + * + */ +void uio_unregister_device(struct uio_info *info) +{ + struct uio_device *idev; + + if (!info || !info->uio_dev) + return; + + idev = info->uio_dev; + + uio_free_minor(idev); + + if (info->irq >= 0) + free_irq(info->irq, idev); + + uio_dev_del_attributes(idev); + + dev_set_drvdata(idev->dev, NULL); + device_destroy(uio_class->class, MKDEV(uio_major, idev->minor)); + kfree(idev); + uio_class_destroy(); + + return; +} +EXPORT_SYMBOL_GPL(uio_unregister_device); + +static int __init uio_init(void) +{ + return 0; +} + +static void __exit uio_exit(void) +{ +} + +module_init(uio_init) +module_exit(uio_exit) +MODULE_LICENSE("GPL v2"); diff --git a/drivers/uio/uio_cif.c b/drivers/uio/uio_cif.c new file mode 100644 index 000000000000..838bae460831 --- /dev/null +++ b/drivers/uio/uio_cif.c @@ -0,0 +1,156 @@ +/* + * UIO Hilscher CIF card driver + * + * (C) 2007 Hans J. Koch <hjk@linutronix.de> + * Original code (C) 2005 Benedikt Spranger <b.spranger@linutronix.de> + * + * Licensed under GPL version 2 only. + * + */ + +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/uio_driver.h> + +#include <asm/io.h> + +#ifndef PCI_DEVICE_ID_PLX_9030 +#define PCI_DEVICE_ID_PLX_9030 0x9030 +#endif + +#define PLX9030_INTCSR 0x4C +#define INTSCR_INT1_ENABLE 0x01 +#define INTSCR_INT1_STATUS 0x04 +#define INT1_ENABLED_AND_ACTIVE (INTSCR_INT1_ENABLE | INTSCR_INT1_STATUS) + +#define PCI_SUBVENDOR_ID_PEP 0x1518 +#define CIF_SUBDEVICE_PROFIBUS 0x430 +#define CIF_SUBDEVICE_DEVICENET 0x432 + + +static irqreturn_t hilscher_handler(int irq, struct uio_info *dev_info) +{ + void __iomem *plx_intscr = dev_info->mem[0].internal_addr + + PLX9030_INTCSR; + + if ((ioread8(plx_intscr) & INT1_ENABLED_AND_ACTIVE) + != INT1_ENABLED_AND_ACTIVE) + return IRQ_NONE; + + /* Disable interrupt */ + iowrite8(ioread8(plx_intscr) & ~INTSCR_INT1_ENABLE, plx_intscr); + return IRQ_HANDLED; +} + +static int __devinit hilscher_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct uio_info *info; + + info = kzalloc(sizeof(struct uio_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + if (pci_enable_device(dev)) + goto out_free; + + if (pci_request_regions(dev, "hilscher")) + goto out_disable; + + info->mem[0].addr = pci_resource_start(dev, 0); + if (!info->mem[0].addr) + goto out_release; + info->mem[0].internal_addr = ioremap(pci_resource_start(dev, 0), + pci_resource_len(dev, 0)); + if (!info->mem[0].internal_addr) + goto out_release; + + info->mem[0].size = pci_resource_len(dev, 0); + info->mem[0].memtype = UIO_MEM_PHYS; + info->mem[1].addr = pci_resource_start(dev, 2); + info->mem[1].size = pci_resource_len(dev, 2); + info->mem[1].memtype = UIO_MEM_PHYS; + switch (id->subdevice) { + case CIF_SUBDEVICE_PROFIBUS: + info->name = "CIF_Profibus"; + break; + case CIF_SUBDEVICE_DEVICENET: + info->name = "CIF_Devicenet"; + break; + default: + info->name = "CIF_???"; + } + info->version = "0.0.1"; + info->irq = dev->irq; + info->irq_flags = IRQF_DISABLED | IRQF_SHARED; + info->handler = hilscher_handler; + + if (uio_register_device(&dev->dev, info)) + goto out_unmap; + + pci_set_drvdata(dev, info); + + return 0; +out_unmap: + iounmap(info->mem[0].internal_addr); +out_release: + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_free: + kfree (info); + return -ENODEV; +} + +static void hilscher_pci_remove(struct pci_dev *dev) +{ + struct uio_info *info = pci_get_drvdata(dev); + + uio_unregister_device(info); + pci_release_regions(dev); + pci_disable_device(dev); + pci_set_drvdata(dev, NULL); + iounmap(info->mem[0].internal_addr); + + kfree (info); +} + +static struct pci_device_id hilscher_pci_ids[] = { + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9030, + .subvendor = PCI_SUBVENDOR_ID_PEP, + .subdevice = CIF_SUBDEVICE_PROFIBUS, + }, + { + .vendor = PCI_VENDOR_ID_PLX, + .device = PCI_DEVICE_ID_PLX_9030, + .subvendor = PCI_SUBVENDOR_ID_PEP, + .subdevice = CIF_SUBDEVICE_DEVICENET, + }, + { 0, } +}; + +static struct pci_driver hilscher_pci_driver = { + .name = "hilscher", + .id_table = hilscher_pci_ids, + .probe = hilscher_pci_probe, + .remove = hilscher_pci_remove, +}; + +static int __init hilscher_init_module(void) +{ + return pci_register_driver(&hilscher_pci_driver); +} + +static void __exit hilscher_exit_module(void) +{ + pci_unregister_driver(&hilscher_pci_driver); +} + +module_init(hilscher_init_module); +module_exit(hilscher_exit_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Hans J. Koch, Benedikt Spranger"); diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 73c49362cd47..654857493a82 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -29,13 +29,6 @@ #include "hcd.h" #include "usb.h" -#define VERBOSE_DEBUG 0 - -#if VERBOSE_DEBUG -#define dev_vdbg dev_dbg -#else -#define dev_vdbg(dev, fmt, args...) do { } while (0) -#endif #ifdef CONFIG_HOTPLUG diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index d6c5f1150ae7..349b8166f34a 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1777,14 +1777,13 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } /* alloc, and start init */ - dev = kmalloc (sizeof *dev, GFP_KERNEL); + dev = kzalloc (sizeof *dev, GFP_KERNEL); if (dev == NULL){ pr_debug("enomem %s\n", pci_name(pdev)); retval = -ENOMEM; goto done; } - memset(dev, 0, sizeof *dev); spin_lock_init(&dev->lock); dev->pdev = pdev; dev->gadget.ops = &goku_ops; diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c index dd33ff0ae4ce..38138bb9ddb0 100644 --- a/drivers/usb/gadget/serial.c +++ b/drivers/usb/gadget/serial.c @@ -1427,7 +1427,7 @@ static int __init gs_bind(struct usb_gadget *gadget) gs_acm_config_desc.bmAttributes |= USB_CONFIG_ATT_WAKEUP; } - gs_device = dev = kmalloc(sizeof(struct gs_dev), GFP_KERNEL); + gs_device = dev = kzalloc(sizeof(struct gs_dev), GFP_KERNEL); if (dev == NULL) return -ENOMEM; @@ -1435,7 +1435,6 @@ static int __init gs_bind(struct usb_gadget *gadget) init_utsname()->sysname, init_utsname()->release, gadget->name); - memset(dev, 0, sizeof(struct gs_dev)); dev->dev_gadget = gadget; spin_lock_init(&dev->dev_lock); INIT_LIST_HEAD(&dev->dev_req_list); diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 2038125b7f8c..6edf4097d2d2 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -171,11 +171,10 @@ static int ohci_urb_enqueue ( } /* allocate the private part of the URB */ - urb_priv = kmalloc (sizeof (urb_priv_t) + size * sizeof (struct td *), + urb_priv = kzalloc (sizeof (urb_priv_t) + size * sizeof (struct td *), mem_flags); if (!urb_priv) return -ENOMEM; - memset (urb_priv, 0, sizeof (urb_priv_t) + size * sizeof (struct td *)); INIT_LIST_HEAD (&urb_priv->pending); urb_priv->length = size; urb_priv->ed = ed; diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index 2d0e73b20099..5da63f535005 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -278,10 +278,9 @@ static int sl811_cs_probe(struct pcmcia_device *link) { local_info_t *local; - local = kmalloc(sizeof(local_info_t), GFP_KERNEL); + local = kzalloc(sizeof(local_info_t), GFP_KERNEL); if (!local) return -ENOMEM; - memset(local, 0, sizeof(local_info_t)); local->p_dev = link; link->priv = local; diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 6c9dc2e69c82..a7a1c891bfa2 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -447,13 +447,12 @@ static int clcdfb_probe(struct amba_device *dev, void *id) goto out; } - fb = kmalloc(sizeof(struct clcd_fb), GFP_KERNEL); + fb = kzalloc(sizeof(struct clcd_fb), GFP_KERNEL); if (!fb) { printk(KERN_INFO "CLCD: could not allocate new clcd_fb struct\n"); ret = -ENOMEM; goto free_region; } - memset(fb, 0, sizeof(struct clcd_fb)); fb->dev = dev; fb->board = board; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index ef330e34d031..0c7bf75732ea 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -2937,12 +2937,11 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev, /* nothing */ ; j = i + 4; - par->mmap_map = kmalloc(j * sizeof(*par->mmap_map), GFP_ATOMIC); + par->mmap_map = kcalloc(j, sizeof(*par->mmap_map), GFP_ATOMIC); if (!par->mmap_map) { PRINTKE("atyfb_setup_sparc() can't alloc mmap_map\n"); return -ENOMEM; } - memset(par->mmap_map, 0, j * sizeof(*par->mmap_map)); for (i = 0, j = 2; i < 6 && pdev->resource[i].start; i++) { struct resource *rp = &pdev->resource[i]; diff --git a/drivers/video/au1200fb.c b/drivers/video/au1200fb.c index dbf4ec3f6d57..03e57ef88378 100644 --- a/drivers/video/au1200fb.c +++ b/drivers/video/au1200fb.c @@ -1589,11 +1589,10 @@ static int au1200fb_init_fbinfo(struct au1200fb_device *fbdev) return -EFAULT; } - fbi->pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL); + fbi->pseudo_palette = kcalloc(16, sizeof(u32), GFP_KERNEL); if (!fbi->pseudo_palette) { return -ENOMEM; } - memset(fbi->pseudo_palette, 0, sizeof(u32) * 16); if (fb_alloc_cmap(&fbi->cmap, AU1200_LCD_NBR_PALETTE_ENTRIES, 0) < 0) { print_err("Fail to allocate colormap (%d entries)", diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c index 50b78af0fa24..dea6579941b7 100644 --- a/drivers/video/clps711xfb.c +++ b/drivers/video/clps711xfb.c @@ -366,11 +366,10 @@ int __init clps711xfb_init(void) if (fb_get_options("clps711xfb", NULL)) return -ENODEV; - cfb = kmalloc(sizeof(*cfb), GFP_KERNEL); + cfb = kzalloc(sizeof(*cfb), GFP_KERNEL); if (!cfb) goto out; - memset(cfb, 0, sizeof(*cfb)); strcpy(cfb->fix.id, "clps711x"); cfb->fbops = &clps7111fb_ops; diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c index 7a6eeda5ae9a..30ede6e8830f 100644 --- a/drivers/video/cyber2000fb.c +++ b/drivers/video/cyber2000fb.c @@ -1221,11 +1221,10 @@ cyberpro_alloc_fb_info(unsigned int id, char *name) { struct cfb_info *cfb; - cfb = kmalloc(sizeof(struct cfb_info), GFP_KERNEL); + cfb = kzalloc(sizeof(struct cfb_info), GFP_KERNEL); if (!cfb) return NULL; - memset(cfb, 0, sizeof(struct cfb_info)); cfb->id = id; diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c index 0f88c30f94f8..f9300266044d 100644 --- a/drivers/video/pvr2fb.c +++ b/drivers/video/pvr2fb.c @@ -1082,13 +1082,12 @@ static int __init pvr2fb_init(void) #endif size = sizeof(struct fb_info) + sizeof(struct pvr2fb_par) + 16 * sizeof(u32); - fb_info = kmalloc(size, GFP_KERNEL); + fb_info = kzalloc(size, GFP_KERNEL); if (!fb_info) { printk(KERN_ERR "Failed to allocate memory for fb_info\n"); return -ENOMEM; } - memset(fb_info, 0, size); currentpar = (struct pvr2fb_par *)(fb_info + 1); diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c index 3d7507ad55f6..b855f4a34afe 100644 --- a/drivers/video/savage/savagefb_driver.c +++ b/drivers/video/savage/savagefb_driver.c @@ -2174,11 +2174,10 @@ static int __devinit savage_init_fb_info(struct fb_info *info, #if defined(CONFIG_FB_SAVAGE_ACCEL) /* FIFO size + padding for commands */ - info->pixmap.addr = kmalloc(8*1024, GFP_KERNEL); + info->pixmap.addr = kcalloc(8, 1024, GFP_KERNEL); err = -ENOMEM; if (info->pixmap.addr) { - memset(info->pixmap.addr, 0, 8*1024); info->pixmap.size = 8*1024; info->pixmap.scan_align = 4; info->pixmap.buf_align = 4; diff --git a/drivers/video/valkyriefb.c b/drivers/video/valkyriefb.c index ad66f070acb8..7b0cef9ca8f9 100644 --- a/drivers/video/valkyriefb.c +++ b/drivers/video/valkyriefb.c @@ -356,10 +356,9 @@ int __init valkyriefb_init(void) } #endif /* ppc (!CONFIG_MAC) */ - p = kmalloc(sizeof(*p), GFP_ATOMIC); + p = kzalloc(sizeof(*p), GFP_ATOMIC); if (p == 0) return -ENOMEM; - memset(p, 0, sizeof(*p)); /* Map in frame buffer and registers */ if (!request_mem_region(frame_buffer_phys, 0x100000, "valkyriefb")) { diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c index 6f9d880ab2e9..d356da5709fc 100644 --- a/drivers/w1/masters/matrox_w1.c +++ b/drivers/w1/masters/matrox_w1.c @@ -164,7 +164,7 @@ static int __devinit matrox_w1_probe(struct pci_dev *pdev, const struct pci_devi if (pdev->vendor != PCI_VENDOR_ID_MATROX || pdev->device != PCI_DEVICE_ID_MATROX_G400) return -ENODEV; - dev = kmalloc(sizeof(struct matrox_device) + + dev = kzalloc(sizeof(struct matrox_device) + sizeof(struct w1_bus_master), GFP_KERNEL); if (!dev) { dev_err(&pdev->dev, @@ -173,7 +173,6 @@ static int __devinit matrox_w1_probe(struct pci_dev *pdev, const struct pci_devi return -ENOMEM; } - memset(dev, 0, sizeof(struct matrox_device) + sizeof(struct w1_bus_master)); dev->bus_master = (struct w1_bus_master *)(dev + 1); diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c index cab56005dd49..858c16a544c2 100644 --- a/drivers/w1/slaves/w1_ds2433.c +++ b/drivers/w1/slaves/w1_ds2433.c @@ -266,10 +266,9 @@ static int w1_f23_add_slave(struct w1_slave *sl) #ifdef CONFIG_W1_SLAVE_DS2433_CRC struct w1_f23_data *data; - data = kmalloc(sizeof(struct w1_f23_data), GFP_KERNEL); + data = kzalloc(sizeof(struct w1_f23_data), GFP_KERNEL); if (!data) return -ENOMEM; - memset(data, 0, sizeof(struct w1_f23_data)); sl->family_data = data; #endif /* CONFIG_W1_SLAVE_DS2433_CRC */ diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index c6332108f1c5..8d7ab74170d5 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -520,7 +520,7 @@ static int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn) int err; struct w1_netlink_msg msg; - sl = kmalloc(sizeof(struct w1_slave), GFP_KERNEL); + sl = kzalloc(sizeof(struct w1_slave), GFP_KERNEL); if (!sl) { dev_err(&dev->dev, "%s: failed to allocate new slave device.\n", @@ -528,7 +528,6 @@ static int w1_attach_slave_device(struct w1_master *dev, struct w1_reg_num *rn) return -ENOMEM; } - memset(sl, 0, sizeof(*sl)); sl->owner = THIS_MODULE; sl->master = dev; diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 258defdb2efd..2fbd8dd16df5 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -41,7 +41,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, /* * We are in process context(kernel thread), so can sleep. */ - dev = kmalloc(sizeof(struct w1_master) + sizeof(struct w1_bus_master), GFP_KERNEL); + dev = kzalloc(sizeof(struct w1_master) + sizeof(struct w1_bus_master), GFP_KERNEL); if (!dev) { printk(KERN_ERR "Failed to allocate %zd bytes for new w1 device.\n", @@ -49,7 +49,6 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, return NULL; } - memset(dev, 0, sizeof(struct w1_master) + sizeof(struct w1_bus_master)); dev->bus_master = (struct w1_bus_master *)(dev + 1); diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 8f07f8d1bfa9..4f77f3caee97 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -456,7 +456,8 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) /* check local lock records first */ ret = 0; - if (posix_test_lock(file, fl) == 0) { + posix_test_lock(file, fl); + if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ ret = afs_vnode_fetch_status(vnode, NULL, key); if (ret < 0) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a27e42bf3400..ba24cb2ff6ce 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -148,6 +148,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, elf_addr_t *elf_info; int ei_index = 0; struct task_struct *tsk = current; + struct vm_area_struct *vma; /* * If this architecture has a platform capability string, copy it @@ -234,6 +235,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, sp = (elf_addr_t __user *)bprm->p; #endif + + /* + * Grow the stack manually; some architectures have a limit on how + * far ahead a user-space access may be in order to grow the stack. + */ + vma = find_extend_vma(current->mm, bprm->p); + if (!vma) + return -EFAULT; + /* Now, let's put argc (and argv, envp if appropriate) on the stack */ if (__put_user(argc, sp++)) return -EFAULT; @@ -254,8 +264,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, size_t len; if (__put_user((elf_addr_t)p, argv++)) return -EFAULT; - len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) + len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); + if (!len || len > MAX_ARG_STRLEN) return 0; p += len; } @@ -266,8 +276,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, size_t len; if (__put_user((elf_addr_t)p, envp++)) return -EFAULT; - len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) + len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); + if (!len || len > MAX_ARG_STRLEN) return 0; p += len; } @@ -826,10 +836,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } /* OK, This is the point of no return */ - current->mm->start_data = 0; - current->mm->end_data = 0; - current->mm->end_code = 0; - current->mm->mmap = NULL; current->flags &= ~PF_FORKNOEXEC; current->mm->def_flags = def_flags; @@ -1051,9 +1057,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; - create_elf_tables(bprm, &loc->elf_ex, + retval = create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT), load_addr, interp_load_addr); + if (retval < 0) { + send_sig(SIGKILL, current, 0); + goto out; + } /* N.B. passed_fileno might not be initialized? */ if (interpreter_type == INTERPRETER_AOUT) current->mm->arg_start += strlen(passed_fileno) + 1; @@ -1252,7 +1262,7 @@ static int dump_seek(struct file *file, loff_t off) * * I think we should skip something. But I am not sure how. H.J. */ -static int maydump(struct vm_area_struct *vma) +static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) { /* The vma can be set up to tell us the answer directly. */ if (vma->vm_flags & VM_ALWAYSDUMP) @@ -1262,15 +1272,19 @@ static int maydump(struct vm_area_struct *vma) if (vma->vm_flags & (VM_IO | VM_RESERVED)) return 0; - /* Dump shared memory only if mapped from an anonymous file. */ - if (vma->vm_flags & VM_SHARED) - return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0; + /* By default, dump shared memory if mapped from an anonymous file. */ + if (vma->vm_flags & VM_SHARED) { + if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) + return test_bit(MMF_DUMP_ANON_SHARED, &mm_flags); + else + return test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags); + } - /* If it hasn't been written to, don't write it out */ + /* By default, if it hasn't been written to, don't write it out. */ if (!vma->anon_vma) - return 0; + return test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags); - return 1; + return test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags); } /* An ELF note in memory */ @@ -1562,6 +1576,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) #endif int thread_status_size = 0; elf_addr_t *auxv; + unsigned long mm_flags; #ifdef ELF_CORE_WRITE_EXTRA_NOTES int extra_notes_size; #endif @@ -1705,6 +1720,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); + /* + * We must use the same mm->flags while dumping core to avoid + * inconsistency between the program headers and bodies, otherwise an + * unusable core file can be generated. + */ + mm_flags = current->mm->flags; + /* Write program headers for segments dump */ for (vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma)) { @@ -1717,7 +1739,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = maydump(vma) ? sz : 0; + phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0; phdr.p_memsz = sz; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -1761,7 +1783,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) vma = next_vma(vma, gate_vma)) { unsigned long addr; - if (!maydump(vma)) + if (!maydump(vma, mm_flags)) continue; for (addr = vma->vm_start; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9d62fbad3d4b..2f5d8dbe676d 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -621,8 +621,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, p = (char __user *) current->mm->arg_start; for (loop = bprm->argc; loop > 0; loop--) { __put_user((elf_caddr_t) p, argv++); - len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE * MAX_ARG_PAGES) + len = strnlen_user(p, MAX_ARG_STRLEN); + if (!len || len > MAX_ARG_STRLEN) return -EINVAL; p += len; } @@ -633,8 +633,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, current->mm->env_start = (unsigned long) p; for (loop = bprm->envc; loop > 0; loop--) { __put_user((elf_caddr_t)(unsigned long) p, envp++); - len = strnlen_user(p, PAGE_SIZE * MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE * MAX_ARG_PAGES) + len = strnlen_user(p, MAX_ARG_STRLEN); + if (!len || len > MAX_ARG_STRLEN) return -EINVAL; p += len; } @@ -1181,8 +1181,10 @@ static int dump_seek(struct file *file, loff_t off) * * I think we should skip something. But I am not sure how. H.J. */ -static int maydump(struct vm_area_struct *vma) +static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) { + int dump_ok; + /* Do not dump I/O mapped devices or special mappings */ if (vma->vm_flags & (VM_IO | VM_RESERVED)) { kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); @@ -1197,27 +1199,35 @@ static int maydump(struct vm_area_struct *vma) return 0; } - /* Dump shared memory only if mapped from an anonymous file. */ + /* By default, dump shared memory if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) { if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) { - kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags); - return 1; + dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags); + kdcore("%08lx: %08lx: %s (share)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + return dump_ok; } - kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags); - return 0; + dump_ok = test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags); + kdcore("%08lx: %08lx: %s (share)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + return dump_ok; } #ifdef CONFIG_MMU - /* If it hasn't been written to, don't write it out */ + /* By default, if it hasn't been written to, don't write it out */ if (!vma->anon_vma) { - kdcore("%08lx: %08lx: no (!anon)", vma->vm_start, vma->vm_flags); - return 0; + dump_ok = test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags); + kdcore("%08lx: %08lx: %s (!anon)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + return dump_ok; } #endif - kdcore("%08lx: %08lx: yes", vma->vm_start, vma->vm_flags); - return 1; + dump_ok = test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags); + kdcore("%08lx: %08lx: %s", vma->vm_start, vma->vm_flags, + dump_ok ? "yes" : "no"); + return dump_ok; } /* An ELF note in memory */ @@ -1456,15 +1466,15 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) * dump the segments for an MMU process */ #ifdef CONFIG_MMU -static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, - size_t *size, unsigned long *limit) +static int elf_fdpic_dump_segments(struct file *file, size_t *size, + unsigned long *limit, unsigned long mm_flags) { struct vm_area_struct *vma; for (vma = current->mm->mmap; vma; vma = vma->vm_next) { unsigned long addr; - if (!maydump(vma)) + if (!maydump(vma, mm_flags)) continue; for (addr = vma->vm_start; @@ -1511,15 +1521,15 @@ end_coredump: * dump the segments for a NOMMU process */ #ifndef CONFIG_MMU -static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, - size_t *size, unsigned long *limit) +static int elf_fdpic_dump_segments(struct file *file, size_t *size, + unsigned long *limit, unsigned long mm_flags) { struct vm_list_struct *vml; for (vml = current->mm->context.vmlist; vml; vml = vml->next) { struct vm_area_struct *vma = vml->vma; - if (!maydump(vma)) + if (!maydump(vma, mm_flags)) continue; if ((*size += PAGE_SIZE) > *limit) @@ -1570,6 +1580,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, struct vm_list_struct *vml; #endif elf_addr_t *auxv; + unsigned long mm_flags; /* * We no longer stop all VM operations. @@ -1707,6 +1718,13 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, /* Page-align dumped data */ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); + /* + * We must use the same mm->flags while dumping core to avoid + * inconsistency between the program headers and bodies, otherwise an + * unusable core file can be generated. + */ + mm_flags = current->mm->flags; + /* write program headers for segments dump */ for ( #ifdef CONFIG_MMU @@ -1728,7 +1746,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, phdr.p_offset = offset; phdr.p_vaddr = vma->vm_start; phdr.p_paddr = 0; - phdr.p_filesz = maydump(vma) ? sz : 0; + phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0; phdr.p_memsz = sz; offset += phdr.p_filesz; phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; @@ -1762,7 +1780,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, DUMP_SEEK(dataoff); - if (elf_fdpic_dump_segments(file, current->mm, &size, &limit) < 0) + if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) goto end_coredump; #ifdef ELF_CORE_WRITE_EXTRA_DATA diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 330fd3fe8546..42e94b3ab7be 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -126,7 +126,9 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto _ret; if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { - remove_arg_zero(bprm); + retval = remove_arg_zero(bprm); + if (retval) + goto _ret; } if (fmt->flags & MISC_FMT_OPEN_BINARY) { diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 304c88544d89..4d0e0f6d3273 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -67,7 +67,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) * This is done in reverse order, because of how the * user environment and arguments are stored. */ - remove_arg_zero(bprm); + retval = remove_arg_zero(bprm); + if (retval) + return retval; retval = copy_strings_kernel(1, &bprm->interp, bprm); if (retval < 0) return retval; bprm->argc++; diff --git a/fs/char_dev.c b/fs/char_dev.c index 164a45cdaf5f..bbbf07baa145 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -321,14 +321,13 @@ void unregister_chrdev_region(dev_t from, unsigned count) } } -int unregister_chrdev(unsigned int major, const char *name) +void unregister_chrdev(unsigned int major, const char *name) { struct char_device_struct *cd; cd = __unregister_chrdev_region(major, 0, 256); if (cd && cd->cdev) cdev_del(cd->cdev); kfree(cd); - return 0; } static DEFINE_SPINLOCK(cdev_lock); diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index a9b6bc5157b8..6d84ca2beead 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,10 @@ +Version 1.50 +------------ +Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is +done with "serverino" mount option). Add support for POSIX Unlink +(helps with certain sharing violation cases when server such as +Samba supports newer POSIX CIFS Protocol Extensions). + Version 1.49 ------------ IPv6 support. Enable ipv6 addresses to be passed on mount (put the ipv6 @@ -8,7 +15,11 @@ when Unix Extensions were ignored). This allows users to override the default uid and gid for files when they are certain that the uids or gids on the server do not match those of the client. Make "sec=none" mount override username (so that null user connection is attempted) -to match what documentation said. +to match what documentation said. Support for very large reads, over 127K, +available to some newer servers (such as Samba 3.0.26 and later but +note that it also requires setting CIFSMaxBufSize at module install +time to a larger value which may hurt performance in some cases). +Make sign option force signing (or fail if server does not support it). Version 1.48 ------------ diff --git a/fs/cifs/README b/fs/cifs/README index 4d01697722cc..85f1eb14083e 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -301,10 +301,21 @@ A partial list of the supported mount options follows: during the local client kernel build will be used. If server does not support Unicode, this parameter is unused. - rsize default read size (usually 16K) - wsize default write size (usually 16K, 32K is often better over GigE) - maximum wsize currently allowed by CIFS is 57344 (14 4096 byte - pages) + rsize default read size (usually 16K). The client currently + can not use rsize larger than CIFSMaxBufSize. CIFSMaxBufSize + defaults to 16K and may be changed (from 8K to the maximum + kmalloc size allowed by your kernel) at module install time + for cifs.ko. Setting CIFSMaxBufSize to a very large value + will cause cifs to use more memory and may reduce performance + in some cases. To use rsize greater than 127K (the original + cifs protocol maximum) also requires that the server support + a new Unix Capability flag (for very large read) which some + newer servers (e.g. Samba 3.0.26 or later) do. rsize can be + set from a minimum of 2048 to a maximum of 130048 (127K or + CIFSMaxBufSize, whichever is smaller) + wsize default write size (default 57344) + maximum wsize currently allowed by CIFS is 57344 (fourteen + 4096 byte pages) rw mount the network share read-write (note that the server may still consider the share read-only) ro mount network share read-only @@ -359,7 +370,7 @@ A partial list of the supported mount options follows: Note that this does not affect the normal ACL check on the target machine done by the server software (of the server ACL against the user name provided at mount time). - serverino Use servers inode numbers instead of generating automatically + serverino Use server's inode numbers instead of generating automatically incrementing inode numbers on the client. Although this will make it easier to spot hardlinked files (as they will have the same inode numbers) and inode numbers may be persistent, @@ -367,12 +378,11 @@ A partial list of the supported mount options follows: are unique if multiple server side mounts are exported under a single share (since inode numbers on the servers might not be unique if multiple filesystems are mounted under the same - shared higher level directory). Note that this requires that - the server support the CIFS Unix Extensions as other servers - do not return a unique IndexNumber on SMB FindFirst (most - servers return zero as the IndexNumber). Parameter has no - effect to Windows servers and others which do not support the - CIFS Unix Extensions. + shared higher level directory). Note that some older + (e.g. pre-Windows 2000) do not support returning UniqueIDs + or the CIFS Unix Extensions equivalent and for those + this mount option will have no effect. Exporting cifs mounts + under nfsd requires this mount option on the cifs mount. noserverino Client generates inode numbers (rather than using the actual one from the server) by default. setuids If the CIFS Unix extensions are negotiated with the server @@ -582,10 +592,10 @@ the start of smb requests and responses can be enabled via: echo 1 > /proc/fs/cifs/traceSMB -Two other experimental features are under development and to test -require enabling CONFIG_CIFS_EXPERIMENTAL +Two other experimental features are under development. To test these +requires enabling CONFIG_CIFS_EXPERIMENTAL - More efficient write operations + ipv6 enablement DNOTIFY fcntl: needed for support of directory change notification and perhaps later for file leases) diff --git a/fs/cifs/TODO b/fs/cifs/TODO index 78b620e332bd..d7bd51575fd6 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO @@ -18,9 +18,9 @@ better) d) Kerberos/SPNEGO session setup support - (started) -e) More testing of NTLMv2 authentication (mostly implemented - double check -that NTLMv2 signing works, also need to cleanup now unneeded SessSetup code in -fs/cifs/connect.c) +e) Cleanup now unneeded SessSetup code in +fs/cifs/connect.c and add back in NTLMSSP code if any servers +need it f) MD5-HMAC signing SMB PDUs when SPNEGO style SessionSetup used (Kerberos or NTLMSSP). Signing alreadyimplemented for NTLM @@ -106,6 +106,12 @@ but recognizes them succeed but still return access denied (appears to be Windows server not cifs client problem) and has not been reproduced recently. NTFS partitions do not have this problem. +4) Unix/POSIX capabilities are reset after reconnection, and affect +a few fields in the tree connection but we do do not know which +superblocks to apply these changes to. We should probably walk +the list of superblocks to set these. Also need to check the +flags on the second mount to the same share, and see if we +can do the same trick that NFS does to remount duplicate shares. Misc testing to do ================== diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 2e75883b7f54..f50a88d58f78 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -1,7 +1,7 @@ -/* +/* * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich - * + * * Copyright (c) 2000 RP Internet (www.rpi.net.au). * * This program is free software; you can redistribute it and/or modify @@ -80,7 +80,7 @@ static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; -/* +/* * ASN.1 context. */ struct asn1_ctx { @@ -190,7 +190,7 @@ asn1_header_decode(struct asn1_ctx *ctx, unsigned char **eoc, unsigned int *cls, unsigned int *con, unsigned int *tag) { - unsigned int def = 0; + unsigned int def = 0; unsigned int len = 0; if (!asn1_id_decode(ctx, cls, con, tag)) @@ -331,7 +331,7 @@ static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, *integer |= ch; } return 1; -} +} static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, @@ -376,7 +376,7 @@ asn1_subid_decode(struct asn1_ctx *ctx, unsigned long *subid) return 1; } -static int +static int asn1_oid_decode(struct asn1_ctx *ctx, unsigned char *eoc, unsigned long **oid, unsigned int *len) { @@ -459,7 +459,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned int cls, con, tag, oidlen, rc; int use_ntlmssp = FALSE; - *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default */ + *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ @@ -498,7 +498,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON) || (tag != ASN1_EOC)) { - cFYI(1,("cls = %d con = %d tag = %d end = %p (%d) exit 0", + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 0", cls, con, tag, end, *end)); return 0; } @@ -508,7 +509,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } else if ((cls != ASN1_UNI) || (con != ASN1_CON) || (tag != ASN1_SEQ)) { - cFYI(1,("cls = %d con = %d tag = %d end = %p (%d) exit 1", + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 1", cls, con, tag, end, *end)); return 0; } @@ -540,32 +542,34 @@ decode_negTokenInit(unsigned char *security_blob, int length, rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); if (!rc) { cFYI(1, - ("Error 1 decoding negTokenInit header exit 2")); + ("Error decoding negTokenInit hdr exit2")); return 0; } if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { rc = asn1_oid_decode(&ctx, end, &oid, &oidlen); - if(rc) { + if (rc) { cFYI(1, - ("OID len = %d oid = 0x%lx 0x%lx 0x%lx 0x%lx", - oidlen, *oid, *(oid + 1), *(oid + 2), - *(oid + 3))); - rc = compare_oid(oid, oidlen, NTLMSSP_OID, - NTLMSSP_OID_LEN); + ("OID len = %d oid = 0x%lx 0x%lx " + "0x%lx 0x%lx", + oidlen, *oid, *(oid + 1), + *(oid + 2), *(oid + 3))); + rc = compare_oid(oid, oidlen, + NTLMSSP_OID, NTLMSSP_OID_LEN); kfree(oid); if (rc) use_ntlmssp = TRUE; } } else { - cFYI(1,("This should be an oid what is going on? ")); + cFYI(1, ("Should be an oid what is going on?")); } } if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, - ("Error decoding last part of negTokenInit exit 3")); + ("Error decoding last part negTokenInit exit3")); return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { /* tag = 3 indicating mechListMIC */ + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + /* tag = 3 indicating mechListMIC */ cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", cls, con, tag, end, *end)); @@ -573,7 +577,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, } if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, - ("Error decoding last part of negTokenInit exit 5")); + ("Error decoding last part negTokenInit exit5")); return 0; } else if ((cls != ASN1_UNI) || (con != ASN1_CON) || (tag != ASN1_SEQ)) { @@ -584,7 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, - ("Error decoding last part of negTokenInit exit 7")); + ("Error decoding last part negTokenInit exit 7")); return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { cFYI(1, @@ -594,20 +598,21 @@ decode_negTokenInit(unsigned char *security_blob, int length, } if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, - ("Error decoding last part of negTokenInit exit 9")); + ("Error decoding last part negTokenInit exit9")); return 0; } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) || (tag != ASN1_GENSTR)) { cFYI(1, - ("Exit 10 cls = %d con = %d tag = %d end = %p (%d)", + ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", cls, con, tag, end, *end)); return 0; } - cFYI(1, ("Need to call asn1_octets_decode() function for this %s", ctx.pointer)); /* is this UTF-8 or ASCII? */ + cFYI(1, ("Need to call asn1_octets_decode() function for %s", + ctx.pointer)); /* is this UTF-8 or ASCII? */ } - /* if (use_kerberos) - *secType = Kerberos + /* if (use_kerberos) + *secType = Kerberos else */ if (use_ntlmssp) { *secType = NTLMSSP; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 07838b2ac1ce..1bf8cf522ad6 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -58,7 +58,7 @@ cifs_dump_mem(char *label, void *data, int length) } #ifdef CONFIG_CIFS_DEBUG2 -void cifs_dump_detail(struct smb_hdr * smb) +void cifs_dump_detail(struct smb_hdr *smb) { cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", smb->Command, smb->Status.CifsError, @@ -67,10 +67,10 @@ void cifs_dump_detail(struct smb_hdr * smb) } -void cifs_dump_mids(struct TCP_Server_Info * server) +void cifs_dump_mids(struct TCP_Server_Info *server) { struct list_head *tmp; - struct mid_q_entry * mid_entry; + struct mid_q_entry *mid_entry; if (server == NULL) return; @@ -114,12 +114,12 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, { struct list_head *tmp; struct list_head *tmp1; - struct mid_q_entry * mid_entry; + struct mid_q_entry *mid_entry; struct cifsSesInfo *ses; struct cifsTconInfo *tcon; int i; int length = 0; - char * original_buf = buf; + char *original_buf = buf; *beginBuffer = buf + offset; @@ -145,7 +145,6 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, (ses->serverNOS == NULL)) { buf += sprintf(buf, "\nentry for %s not fully " "displayed\n\t", ses->serverName); - } else { length = sprintf(buf, @@ -901,90 +900,14 @@ security_flags_write(struct file *file, const char __user *buffer, } /* flags look ok - update the global security flags for cifs module */ extended_security = flags; + if (extended_security & CIFSSEC_MUST_SIGN) { + /* requiring signing implies signing is allowed */ + extended_security |= CIFSSEC_MAY_SIGN; + cFYI(1, ("packet signing now required")); + } else if ((extended_security & CIFSSEC_MAY_SIGN) == 0) { + cFYI(1, ("packet signing disabled")); + } + /* BB should we turn on MAY flags for other MUST options? */ return count; } - -/* static int -ntlmv2_enabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", ntlmv2_support); - - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; -} -static int -ntlmv2_enabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - char c; - int rc; - - rc = get_user(c, buffer); - if (rc) - return rc; - if (c == '0' || c == 'n' || c == 'N') - ntlmv2_support = 0; - else if (c == '1' || c == 'y' || c == 'Y') - ntlmv2_support = 1; - else if (c == '2') - ntlmv2_support = 2; - - return count; -} - -static int -packet_signing_enabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", sign_CIFS_PDUs); - - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; -} -static int -packet_signing_enabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - char c; - int rc; - - rc = get_user(c, buffer); - if (rc) - return rc; - if (c == '0' || c == 'n' || c == 'N') - sign_CIFS_PDUs = 0; - else if (c == '1' || c == 'y' || c == 'Y') - sign_CIFS_PDUs = 1; - else if (c == '2') - sign_CIFS_PDUs = 2; - - return count; -} */ - - #endif diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4cc2012e9322..34af556cdd8d 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -43,6 +43,6 @@ struct cifs_sb_info { mode_t mnt_dir_mode; int mnt_cifs_flags; int prepathlen; - char * prepath; + char *prepath; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 701e9a9185f2..b5903b89250d 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -66,7 +66,7 @@ cifs_strtoUCS(__le16 * to, const char *from, int len, { int charlen; int i; - wchar_t * wchar_to = (wchar_t *)to; /* needed to quiet sparse */ + wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */ for (i = 0; len && *from; i++, from += charlen, len -= charlen) { diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 39e5b970325f..614c11fcdcb6 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -5,20 +5,20 @@ * Convert a unicode character to upper or lower case using * compressed tables. * - * Copyright (c) International Business Machines Corp., 2000,2005555555555555555555555555555555555555555555555555555555 + * Copyright (c) International Business Machines Corp., 2000,2007 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * @@ -70,7 +70,7 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); * Address of the first string */ static inline wchar_t * -UniStrcat(wchar_t * ucs1, const wchar_t * ucs2) +UniStrcat(wchar_t *ucs1, const wchar_t *ucs2) { wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */ @@ -88,7 +88,7 @@ UniStrcat(wchar_t * ucs1, const wchar_t * ucs2) * or NULL if the character is not in the string */ static inline wchar_t * -UniStrchr(const wchar_t * ucs, wchar_t uc) +UniStrchr(const wchar_t *ucs, wchar_t uc) { while ((*ucs != uc) && *ucs) ucs++; @@ -107,7 +107,7 @@ UniStrchr(const wchar_t * ucs, wchar_t uc) * > 0: First string is greater than second */ static inline int -UniStrcmp(const wchar_t * ucs1, const wchar_t * ucs2) +UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2) { while ((*ucs1 == *ucs2) && *ucs1) { ucs1++; @@ -120,7 +120,7 @@ UniStrcmp(const wchar_t * ucs1, const wchar_t * ucs2) * UniStrcpy: Copy a string */ static inline wchar_t * -UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2) +UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2) { wchar_t *anchor = ucs1; /* save the start of result string */ @@ -132,7 +132,7 @@ UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2) * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes) */ static inline size_t -UniStrlen(const wchar_t * ucs1) +UniStrlen(const wchar_t *ucs1) { int i = 0; @@ -142,10 +142,11 @@ UniStrlen(const wchar_t * ucs1) } /* - * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a string (length limited) + * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a + * string (length limited) */ static inline size_t -UniStrnlen(const wchar_t * ucs1, int maxlen) +UniStrnlen(const wchar_t *ucs1, int maxlen) { int i = 0; @@ -161,7 +162,7 @@ UniStrnlen(const wchar_t * ucs1, int maxlen) * UniStrncat: Concatenate length limited string */ static inline wchar_t * -UniStrncat(wchar_t * ucs1, const wchar_t * ucs2, size_t n) +UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n) { wchar_t *anchor = ucs1; /* save pointer to string 1 */ @@ -179,7 +180,7 @@ UniStrncat(wchar_t * ucs1, const wchar_t * ucs2, size_t n) * UniStrncmp: Compare length limited string */ static inline int -UniStrncmp(const wchar_t * ucs1, const wchar_t * ucs2, size_t n) +UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) { if (!n) return 0; /* Null strings are equal */ @@ -194,7 +195,7 @@ UniStrncmp(const wchar_t * ucs1, const wchar_t * ucs2, size_t n) * UniStrncmp_le: Compare length limited string - native to little-endian */ static inline int -UniStrncmp_le(const wchar_t * ucs1, const wchar_t * ucs2, size_t n) +UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) { if (!n) return 0; /* Null strings are equal */ @@ -209,7 +210,7 @@ UniStrncmp_le(const wchar_t * ucs1, const wchar_t * ucs2, size_t n) * UniStrncpy: Copy length limited string with pad */ static inline wchar_t * -UniStrncpy(wchar_t * ucs1, const wchar_t * ucs2, size_t n) +UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n) { wchar_t *anchor = ucs1; @@ -226,7 +227,7 @@ UniStrncpy(wchar_t * ucs1, const wchar_t * ucs2, size_t n) * UniStrncpy_le: Copy length limited string with pad to little-endian */ static inline wchar_t * -UniStrncpy_le(wchar_t * ucs1, const wchar_t * ucs2, size_t n) +UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n) { wchar_t *anchor = ucs1; @@ -247,7 +248,7 @@ UniStrncpy_le(wchar_t * ucs1, const wchar_t * ucs2, size_t n) * NULL if no matching string is found */ static inline wchar_t * -UniStrstr(const wchar_t * ucs1, const wchar_t * ucs2) +UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2) { const wchar_t *anchor1 = ucs1; const wchar_t *anchor2 = ucs2; @@ -297,7 +298,7 @@ UniToupper(register wchar_t uc) * UniStrupr: Upper case a unicode string */ static inline wchar_t * -UniStrupr(register wchar_t * upin) +UniStrupr(register wchar_t *upin) { register wchar_t *up; @@ -338,7 +339,7 @@ UniTolower(wchar_t uc) * UniStrlwr: Lower case a unicode string */ static inline wchar_t * -UniStrlwr(register wchar_t * upin) +UniStrlwr(register wchar_t *upin) { register wchar_t *up; diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h index da2ad5b451ac..18a9d978e519 100644 --- a/fs/cifs/cifs_uniupr.h +++ b/fs/cifs/cifs_uniupr.h @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * uniupr.h - Unicode compressed case ranges @@ -53,7 +53,7 @@ signed char CifsUniUpperTable[512] = { 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */ -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */ 0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */ - -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -79, 0, -1, /* 1d0-1df */ + -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -79, 0, -1, /* 1d0-1df */ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */ 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */ }; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index fdeda519eace..36272293027d 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -21,7 +21,7 @@ #include <linux/fs.h> #include "cifspdu.h" -#include "cifsglob.h" +#include "cifsglob.h" #include "cifs_debug.h" #include "md5.h" #include "cifs_unicode.h" @@ -29,54 +29,57 @@ #include <linux/ctype.h> #include <linux/random.h> -/* Calculate and return the CIFS signature based on the mac key and the smb pdu */ +/* Calculate and return the CIFS signature based on the mac key and SMB PDU */ /* the 16 byte signature must be allocated by the caller */ /* Note we only use the 1st eight bytes */ -/* Note that the smb header signature field on input contains the +/* Note that the smb header signature field on input contains the sequence number before this function is called */ extern void mdfour(unsigned char *out, unsigned char *in, int n); extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); extern void SMBencrypt(unsigned char *passwd, unsigned char *c8, - unsigned char *p24); - -static int cifs_calculate_signature(const struct smb_hdr * cifs_pdu, - const char * key, char * signature) + unsigned char *p24); + +static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, + const struct mac_key *key, char *signature) { struct MD5Context context; - if((cifs_pdu == NULL) || (signature == NULL)) + if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) return -EINVAL; MD5Init(&context); - MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16); - MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); - MD5Final(signature,&context); + MD5Update(&context, (char *)&key->data, key->len); + MD5Update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); + + MD5Final(signature, &context); return 0; } -int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct TCP_Server_Info * server, - __u32 * pexpected_response_sequence_number) +int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, + __u32 *pexpected_response_sequence_number) { int rc = 0; char smb_signature[20]; - if((cifs_pdu == NULL) || (server == NULL)) + if ((cifs_pdu == NULL) || (server == NULL)) return -EINVAL; - if((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) + if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) return rc; spin_lock(&GlobalMid_Lock); - cifs_pdu->Signature.Sequence.SequenceNumber = cpu_to_le32(server->sequence_number); + cifs_pdu->Signature.Sequence.SequenceNumber = + cpu_to_le32(server->sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; - + *pexpected_response_sequence_number = server->sequence_number++; server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calculate_signature(cifs_pdu, server->mac_signing_key,smb_signature); - if(rc) + rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key, + smb_signature); + if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8); @@ -84,115 +87,119 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct TCP_Server_Info * server, return rc; } -static int cifs_calc_signature2(const struct kvec * iov, int n_vec, - const char * key, char * signature) +static int cifs_calc_signature2(const struct kvec *iov, int n_vec, + const struct mac_key *key, char *signature) { struct MD5Context context; int i; - if((iov == NULL) || (signature == NULL)) + if ((iov == NULL) || (signature == NULL) || (key == NULL)) return -EINVAL; MD5Init(&context); - MD5Update(&context,key,CIFS_SESS_KEY_SIZE+16); - for(i=0;i<n_vec;i++) { - if(iov[i].iov_base == NULL) { - cERROR(1,("null iovec entry")); + MD5Update(&context, (char *)&key->data, key->len); + for (i = 0; i < n_vec; i++) { + if (iov[i].iov_base == NULL) { + cERROR(1, ("null iovec entry")); return -EIO; - } else if(iov[i].iov_len == 0) + } else if (iov[i].iov_len == 0) break; /* bail out if we are sent nothing to sign */ - /* The first entry includes a length field (which does not get + /* The first entry includes a length field (which does not get signed that occupies the first 4 bytes before the header */ - if(i==0) { + if (i == 0) { if (iov[0].iov_len <= 8 ) /* cmd field at offset 9 */ break; /* nothing to sign or corrupt header */ - MD5Update(&context,iov[0].iov_base+4, iov[0].iov_len-4); + MD5Update(&context, iov[0].iov_base+4, + iov[0].iov_len-4); } else - MD5Update(&context,iov[i].iov_base, iov[i].iov_len); + MD5Update(&context, iov[i].iov_base, iov[i].iov_len); } - MD5Final(signature,&context); + MD5Final(signature, &context); return 0; } -int cifs_sign_smb2(struct kvec * iov, int n_vec, struct TCP_Server_Info *server, +int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, __u32 * pexpected_response_sequence_number) { int rc = 0; char smb_signature[20]; - struct smb_hdr * cifs_pdu = iov[0].iov_base; + struct smb_hdr *cifs_pdu = iov[0].iov_base; - if((cifs_pdu == NULL) || (server == NULL)) + if ((cifs_pdu == NULL) || (server == NULL)) return -EINVAL; - if((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) + if ((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0) return rc; - spin_lock(&GlobalMid_Lock); - cifs_pdu->Signature.Sequence.SequenceNumber = + spin_lock(&GlobalMid_Lock); + cifs_pdu->Signature.Sequence.SequenceNumber = cpu_to_le32(server->sequence_number); - cifs_pdu->Signature.Sequence.Reserved = 0; + cifs_pdu->Signature.Sequence.Reserved = 0; - *pexpected_response_sequence_number = server->sequence_number++; - server->sequence_number++; - spin_unlock(&GlobalMid_Lock); + *pexpected_response_sequence_number = server->sequence_number++; + server->sequence_number++; + spin_unlock(&GlobalMid_Lock); - rc = cifs_calc_signature2(iov, n_vec, server->mac_signing_key, + rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key, smb_signature); - if(rc) - memset(cifs_pdu->Signature.SecuritySignature, 0, 8); - else - memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8); - - return rc; + if (rc) + memset(cifs_pdu->Signature.SecuritySignature, 0, 8); + else + memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8); + return rc; } -int cifs_verify_signature(struct smb_hdr * cifs_pdu, const char * mac_key, - __u32 expected_sequence_number) +int cifs_verify_signature(struct smb_hdr *cifs_pdu, + const struct mac_key *mac_key, + __u32 expected_sequence_number) { unsigned int rc; char server_response_sig[8]; char what_we_think_sig_should_be[20]; - if((cifs_pdu == NULL) || (mac_key == NULL)) + if ((cifs_pdu == NULL) || (mac_key == NULL)) return -EINVAL; if (cifs_pdu->Command == SMB_COM_NEGOTIATE) return 0; if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { - struct smb_com_lock_req * pSMB = (struct smb_com_lock_req *)cifs_pdu; - if(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE) + struct smb_com_lock_req *pSMB = + (struct smb_com_lock_req *)cifs_pdu; + if (pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE) return 0; } - /* BB what if signatures are supposed to be on for session but server does not - send one? BB */ - + /* BB what if signatures are supposed to be on for session but + server does not send one? BB */ + /* Do not need to verify session setups with signature "BSRSPYL " */ - if(memcmp(cifs_pdu->Signature.SecuritySignature,"BSRSPYL ",8)==0) - cFYI(1,("dummy signature received for smb command 0x%x",cifs_pdu->Command)); + if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0) + cFYI(1, ("dummy signature received for smb command 0x%x", + cifs_pdu->Command)); /* save off the origiginal signature so we can modify the smb and check its signature against what the server sent */ - memcpy(server_response_sig,cifs_pdu->Signature.SecuritySignature,8); + memcpy(server_response_sig, cifs_pdu->Signature.SecuritySignature, 8); - cifs_pdu->Signature.Sequence.SequenceNumber = cpu_to_le32(expected_sequence_number); + cifs_pdu->Signature.Sequence.SequenceNumber = + cpu_to_le32(expected_sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; rc = cifs_calculate_signature(cifs_pdu, mac_key, what_we_think_sig_should_be); - if(rc) + if (rc) return rc; - -/* cifs_dump_mem("what we think it should be: ",what_we_think_sig_should_be,16); */ +/* cifs_dump_mem("what we think it should be: ", + what_we_think_sig_should_be, 16); */ - if(memcmp(server_response_sig, what_we_think_sig_should_be, 8)) + if (memcmp(server_response_sig, what_we_think_sig_should_be, 8)) return -EACCES; else return 0; @@ -200,89 +207,94 @@ int cifs_verify_signature(struct smb_hdr * cifs_pdu, const char * mac_key, } /* We fill in key by putting in 40 byte array which was allocated by caller */ -int cifs_calculate_mac_key(char * key, const char * rn, const char * password) +int cifs_calculate_mac_key(struct mac_key *key, const char *rn, + const char *password) { char temp_key[16]; if ((key == NULL) || (rn == NULL)) return -EINVAL; E_md4hash(password, temp_key); - mdfour(key,temp_key,16); - memcpy(key+16,rn, CIFS_SESS_KEY_SIZE); + mdfour(key->data.ntlm, temp_key, 16); + memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE); + key->len = 40; return 0; } -int CalcNTLMv2_partial_mac_key(struct cifsSesInfo * ses, - const struct nls_table * nls_info) +int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses, + const struct nls_table *nls_info) { char temp_hash[16]; struct HMACMD5Context ctx; - char * ucase_buf; - __le16 * unicode_buf; - unsigned int i,user_name_len,dom_name_len; + char *ucase_buf; + __le16 *unicode_buf; + unsigned int i, user_name_len, dom_name_len; - if(ses == NULL) + if (ses == NULL) return -EINVAL; E_md4hash(ses->password, temp_hash); hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); user_name_len = strlen(ses->userName); - if(user_name_len > MAX_USERNAME_SIZE) + if (user_name_len > MAX_USERNAME_SIZE) return -EINVAL; - if(ses->domainName == NULL) + if (ses->domainName == NULL) return -EINVAL; /* BB should we use CIFS_LINUX_DOM */ dom_name_len = strlen(ses->domainName); - if(dom_name_len > MAX_USERNAME_SIZE) + if (dom_name_len > MAX_USERNAME_SIZE) return -EINVAL; - + ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); - if(ucase_buf == NULL) + if (ucase_buf == NULL) return -ENOMEM; unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); - if(unicode_buf == NULL) { + if (unicode_buf == NULL) { kfree(ucase_buf); return -ENOMEM; } - - for(i=0;i<user_name_len;i++) + + for (i = 0; i < user_name_len; i++) ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]]; ucase_buf[i] = 0; - user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, MAX_USERNAME_SIZE*2, nls_info); + user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, + MAX_USERNAME_SIZE*2, nls_info); unicode_buf[user_name_len] = 0; user_name_len++; - for(i=0;i<dom_name_len;i++) + for (i = 0; i < dom_name_len; i++) ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]]; ucase_buf[i] = 0; - dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, MAX_USERNAME_SIZE*2, nls_info); + dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, + MAX_USERNAME_SIZE*2, nls_info); unicode_buf[user_name_len + dom_name_len] = 0; hmac_md5_update((const unsigned char *) unicode_buf, - (user_name_len+dom_name_len)*2,&ctx); + (user_name_len+dom_name_len)*2, &ctx); - hmac_md5_final(ses->server->mac_signing_key,&ctx); + hmac_md5_final(ses->server->ntlmv2_hash, &ctx); kfree(ucase_buf); kfree(unicode_buf); return 0; } #ifdef CONFIG_CIFS_WEAK_PW_HASH -void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key) +void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) { int i; char password_with_pad[CIFS_ENCPWD_SIZE]; - if(ses->server == NULL) + if (ses->server == NULL) return; memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); - if(ses->password) + if (ses->password) strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); - if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) - if(extended_security & CIFSSEC_MAY_PLNTXT) { - memcpy(lnm_session_key, password_with_pad, CIFS_ENCPWD_SIZE); + if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) + if (extended_security & CIFSSEC_MAY_PLNTXT) { + memcpy(lnm_session_key, password_with_pad, + CIFS_ENCPWD_SIZE); return; } @@ -297,7 +309,7 @@ void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key) utf8 and other multibyte codepages each need their own strupper function since a byte at a time will ont work. */ - for(i = 0; i < CIFS_ENCPWD_SIZE; i++) { + for (i = 0; i < CIFS_ENCPWD_SIZE; i++) { password_with_pad[i] = toupper(password_with_pad[i]); } @@ -307,19 +319,19 @@ void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key) } #endif /* CIFS_WEAK_PW_HASH */ -static int calc_ntlmv2_hash(struct cifsSesInfo *ses, - const struct nls_table * nls_cp) +static int calc_ntlmv2_hash(struct cifsSesInfo *ses, + const struct nls_table *nls_cp) { int rc = 0; int len; char nt_hash[16]; - struct HMACMD5Context * pctxt; - wchar_t * user; - wchar_t * domain; + struct HMACMD5Context *pctxt; + wchar_t *user; + wchar_t *domain; pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); - if(pctxt == NULL) + if (pctxt == NULL) return -ENOMEM; /* calculate md4 hash of password */ @@ -331,41 +343,45 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, /* convert ses->userName to unicode and uppercase */ len = strlen(ses->userName); user = kmalloc(2 + (len * 2), GFP_KERNEL); - if(user == NULL) + if (user == NULL) goto calc_exit_2; len = cifs_strtoUCS(user, ses->userName, len, nls_cp); UniStrupr(user); hmac_md5_update((char *)user, 2*len, pctxt); /* convert ses->domainName to unicode and uppercase */ - if(ses->domainName) { + if (ses->domainName) { len = strlen(ses->domainName); - domain = kmalloc(2 + (len * 2), GFP_KERNEL); - if(domain == NULL) + domain = kmalloc(2 + (len * 2), GFP_KERNEL); + if (domain == NULL) goto calc_exit_1; len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp); - UniStrupr(domain); + /* the following line was removed since it didn't work well + with lower cased domain name that passed as an option. + Maybe converting the domain name earlier makes sense */ + /* UniStrupr(domain); */ hmac_md5_update((char *)domain, 2*len, pctxt); - + kfree(domain); } calc_exit_1: kfree(user); calc_exit_2: - /* BB FIXME what about bytes 24 through 40 of the signing key? + /* BB FIXME what about bytes 24 through 40 of the signing key? compare with the NTLM example */ - hmac_md5_final(ses->server->mac_signing_key, pctxt); + hmac_md5_final(ses->server->ntlmv2_hash, pctxt); return rc; } -void setup_ntlmv2_rsp(struct cifsSesInfo * ses, char * resp_buf, - const struct nls_table * nls_cp) +void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, + const struct nls_table *nls_cp) { int rc; - struct ntlmv2_resp * buf = (struct ntlmv2_resp *)resp_buf; + struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; + struct HMACMD5Context context; buf->blob_signature = cpu_to_le32(0x00000101); buf->reserved = 0; @@ -379,21 +395,31 @@ void setup_ntlmv2_rsp(struct cifsSesInfo * ses, char * resp_buf, /* calculate buf->ntlmv2_hash */ rc = calc_ntlmv2_hash(ses, nls_cp); - if(rc) - cERROR(1,("could not get v2 hash rc %d",rc)); + if (rc) + cERROR(1, ("could not get v2 hash rc %d", rc)); CalcNTLMv2_response(ses, resp_buf); + + /* now calculate the MAC key for NTLMv2 */ + hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); + hmac_md5_update(resp_buf, 16, &context); + hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context); + + memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf, + sizeof(struct ntlmv2_resp)); + ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp); } -void CalcNTLMv2_response(const struct cifsSesInfo * ses, char * v2_session_response) +void CalcNTLMv2_response(const struct cifsSesInfo *ses, + char *v2_session_response) { struct HMACMD5Context context; /* rest of v2 struct already generated */ - memcpy(v2_session_response + 8, ses->server->cryptKey,8); - hmac_md5_init_limK_to_64(ses->server->mac_signing_key, 16, &context); + memcpy(v2_session_response + 8, ses->server->cryptKey, 8); + hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); - hmac_md5_update(v2_session_response+8, + hmac_md5_update(v2_session_response+8, sizeof(struct ntlmv2_resp) - 8, &context); - hmac_md5_final(v2_session_response,&context); + hmac_md5_final(v2_session_response, &context); /* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index bd0f2f2353ce..1fd0dc85f53c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -64,23 +64,27 @@ unsigned int multiuser_mount = 0; unsigned int extended_security = CIFSSEC_DEF; /* unsigned int ntlmv2_support = 0; */ unsigned int sign_CIFS_PDUs = 1; -extern struct task_struct * oplockThread; /* remove sparse warning */ -struct task_struct * oplockThread = NULL; +extern struct task_struct *oplockThread; /* remove sparse warning */ +struct task_struct *oplockThread = NULL; /* extern struct task_struct * dnotifyThread; remove sparse warning */ -static struct task_struct * dnotifyThread = NULL; +static struct task_struct *dnotifyThread = NULL; static const struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, int, 0); -MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048"); +MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). " + "Default: 16384 Range: 8192 to 130048"); unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL; module_param(cifs_min_rcv, int, 0); -MODULE_PARM_DESC(cifs_min_rcv,"Network buffers in pool. Default: 4 Range: 1 to 64"); +MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: " + "1 to 64"); unsigned int cifs_min_small = 30; module_param(cifs_min_small, int, 0); -MODULE_PARM_DESC(cifs_min_small,"Small network buffers in pool. Default: 30 Range: 2 to 256"); +MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " + "Range: 2 to 256"); unsigned int cifs_max_pending = CIFS_MAX_REQ; module_param(cifs_max_pending, int, 0); -MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256"); +MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " + "Default: 50 Range: 2 to 256"); extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; @@ -95,10 +99,10 @@ cifs_read_super(struct super_block *sb, void *data, struct inode *inode; struct cifs_sb_info *cifs_sb; int rc = 0; - + /* BB should we make this contingent on mount parm? */ sb->s_flags |= MS_NODIRATIME | MS_NOATIME; - sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info),GFP_KERNEL); + sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); cifs_sb = CIFS_SB(sb); if (cifs_sb == NULL) return -ENOMEM; @@ -114,12 +118,9 @@ cifs_read_super(struct super_block *sb, void *data, sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (experimEnabled != 0) - sb->s_export_op = &cifs_export_ops; -#endif /* EXPERIMENTAL */ /* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) - sb->s_blocksize = cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ + sb->s_blocksize = + cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ #ifdef CONFIG_CIFS_QUOTA sb->s_qcop = &cifs_quotactl_ops; #endif @@ -139,6 +140,13 @@ cifs_read_super(struct super_block *sb, void *data, goto out_no_root; } +#ifdef CONFIG_CIFS_EXPERIMENTAL + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + cFYI(1, ("export ops supported")); + sb->s_export_op = &cifs_export_ops; + } +#endif /* EXPERIMENTAL */ + return 0; out_no_root: @@ -149,7 +157,7 @@ out_no_root: out_mount_failed: if (cifs_sb) { if (cifs_sb->local_nls) - unload_nls(cifs_sb->local_nls); + unload_nls(cifs_sb->local_nls); kfree(cifs_sb); } return rc; @@ -164,10 +172,10 @@ cifs_put_super(struct super_block *sb) cFYI(1, ("In cifs_put_super")); cifs_sb = CIFS_SB(sb); if (cifs_sb == NULL) { - cFYI(1,("Empty cifs superblock info passed to unmount")); + cFYI(1, ("Empty cifs superblock info passed to unmount")); return; } - rc = cifs_umount(sb, cifs_sb); + rc = cifs_umount(sb, cifs_sb); if (rc) { cERROR(1, ("cifs_umount failed with return code %d", rc)); } @@ -180,7 +188,7 @@ static int cifs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - int xid; + int xid; int rc = -EOPNOTSUPP; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; @@ -193,7 +201,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = CIFS_MAGIC_NUMBER; /* instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO */ - buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would + buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would presumably be total path, but note that some servers (includinng Samba 3) have a shorter maximum path */ @@ -217,8 +225,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) bypassed it because we detected that this was an older LANMAN sess */ if (rc) rc = SMBOldQFSInfo(xid, pTcon, buf); - /* - int f_type; + /* int f_type; __fsid_t f_fsid; int f_namelen; */ /* BB get from info in tcon struct at mount time call to QFSAttrInfo */ @@ -227,7 +234,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) longer available? */ } -static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) +static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct cifs_sb_info *cifs_sb; @@ -235,10 +242,10 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { return 0; - } else /* file mode might have been restricted at mount time - on the client (above and beyond ACL on servers) for + } else /* file mode might have been restricted at mount time + on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, - so allowing client to check permissions is useful */ + so allowing client to check permissions is useful */ return generic_permission(inode, mask, NULL); } @@ -267,7 +274,7 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->clientCanCacheRead = FALSE; cifs_inode->clientCanCacheAll = FALSE; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ - + /* Can not set i_flags here - they get immediately overwritten to zero by the VFS */ /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/ @@ -309,26 +316,26 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) seq_printf(s, ",posixpaths"); if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) || - !(cifs_sb->tcon->ses->capabilities & CAP_UNIX)) + !(cifs_sb->tcon->unix_ext)) seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) || - !(cifs_sb->tcon->ses->capabilities & CAP_UNIX)) + !(cifs_sb->tcon->unix_ext)) seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); - seq_printf(s, ",rsize=%d",cifs_sb->rsize); - seq_printf(s, ",wsize=%d",cifs_sb->wsize); + seq_printf(s, ",rsize=%d", cifs_sb->rsize); + seq_printf(s, ",wsize=%d", cifs_sb->wsize); } return 0; } #ifdef CONFIG_CIFS_QUOTA -int cifs_xquota_set(struct super_block * sb, int quota_type, qid_t qid, - struct fs_disk_quota * pdquota) +int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid, + struct fs_disk_quota *pdquota) { int xid; int rc = 0; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsTconInfo *pTcon; - + if (cifs_sb) pTcon = cifs_sb->tcon; else @@ -337,7 +344,7 @@ int cifs_xquota_set(struct super_block * sb, int quota_type, qid_t qid, xid = GetXid(); if (pTcon) { - cFYI(1,("set type: 0x%x id: %d",quota_type,qid)); + cFYI(1, ("set type: 0x%x id: %d", quota_type, qid)); } else { return -EIO; } @@ -346,8 +353,8 @@ int cifs_xquota_set(struct super_block * sb, int quota_type, qid_t qid, return rc; } -int cifs_xquota_get(struct super_block * sb, int quota_type, qid_t qid, - struct fs_disk_quota * pdquota) +int cifs_xquota_get(struct super_block *sb, int quota_type, qid_t qid, + struct fs_disk_quota *pdquota) { int xid; int rc = 0; @@ -361,7 +368,7 @@ int cifs_xquota_get(struct super_block * sb, int quota_type, qid_t qid, xid = GetXid(); if (pTcon) { - cFYI(1,("set type: 0x%x id: %d",quota_type,qid)); + cFYI(1, ("set type: 0x%x id: %d", quota_type, qid)); } else { rc = -EIO; } @@ -370,9 +377,9 @@ int cifs_xquota_get(struct super_block * sb, int quota_type, qid_t qid, return rc; } -int cifs_xstate_set(struct super_block * sb, unsigned int flags, int operation) +int cifs_xstate_set(struct super_block *sb, unsigned int flags, int operation) { - int xid; + int xid; int rc = 0; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifsTconInfo *pTcon; @@ -384,7 +391,7 @@ int cifs_xstate_set(struct super_block * sb, unsigned int flags, int operation) xid = GetXid(); if (pTcon) { - cFYI(1,("flags: 0x%x operation: 0x%x",flags,operation)); + cFYI(1, ("flags: 0x%x operation: 0x%x", flags, operation)); } else { rc = -EIO; } @@ -393,7 +400,7 @@ int cifs_xstate_set(struct super_block * sb, unsigned int flags, int operation) return rc; } -int cifs_xstate_get(struct super_block * sb, struct fs_quota_stat *qstats) +int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats) { int xid; int rc = 0; @@ -407,7 +414,7 @@ int cifs_xstate_get(struct super_block * sb, struct fs_quota_stat *qstats) } xid = GetXid(); if (pTcon) { - cFYI(1,("pqstats %p",qstats)); + cFYI(1, ("pqstats %p", qstats)); } else { rc = -EIO; } @@ -424,10 +431,10 @@ static struct quotactl_ops cifs_quotactl_ops = { }; #endif -static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) +static void cifs_umount_begin(struct vfsmount *vfsmnt, int flags) { struct cifs_sb_info *cifs_sb; - struct cifsTconInfo * tcon; + struct cifsTconInfo *tcon; if (!(flags & MNT_FORCE)) return; @@ -445,9 +452,8 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ - if (tcon->ses && tcon->ses->server) - { - cFYI(1,("wake up tasks now - umount begin not complete")); + if (tcon->ses && tcon->ses->server) { + cFYI(1, ("wake up tasks now - umount begin not complete")); wake_up_all(&tcon->ses->server->request_q); wake_up_all(&tcon->ses->server->response_q); msleep(1); /* yield */ @@ -480,10 +486,11 @@ static const struct super_operations cifs_super_ops = { .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, .destroy_inode = cifs_destroy_inode, -/* .drop_inode = generic_delete_inode, - .delete_inode = cifs_delete_inode, *//* Do not need the above two functions - unless later we add lazy close of inodes or unless the kernel forgets to call - us with the same number of releases (closes) as opens */ +/* .drop_inode = generic_delete_inode, + .delete_inode = cifs_delete_inode, */ /* Do not need above two + functions unless later we add lazy close of inodes or unless the + kernel forgets to call us with the same number of releases (closes) + as opens */ .show_options = cifs_show_options, .umount_begin = cifs_umount_begin, .remount_fs = cifs_remount, @@ -586,11 +593,11 @@ const struct inode_operations cifs_file_inode_ops = { .getxattr = cifs_getxattr, .listxattr = cifs_listxattr, .removexattr = cifs_removexattr, -#endif +#endif }; const struct inode_operations cifs_symlink_inode_ops = { - .readlink = generic_readlink, + .readlink = generic_readlink, .follow_link = cifs_follow_link, .put_link = cifs_put_link, .permission = cifs_permission, @@ -602,7 +609,7 @@ const struct inode_operations cifs_symlink_inode_ops = { .getxattr = cifs_getxattr, .listxattr = cifs_listxattr, .removexattr = cifs_removexattr, -#endif +#endif }; const struct file_operations cifs_file_ops = { @@ -628,7 +635,7 @@ const struct file_operations cifs_file_ops = { }; const struct file_operations cifs_file_direct_ops = { - /* no mmap, no aio, no readv - + /* no mmap, no aio, no readv - BB reevaluate whether they can be done with directio, no cache */ .read = cifs_user_read, .write = cifs_user_write, @@ -668,7 +675,7 @@ const struct file_operations cifs_file_nobrl_ops = { }; const struct file_operations cifs_file_direct_nobrl_ops = { - /* no mmap, no aio, no readv - + /* no mmap, no aio, no readv - BB reevaluate whether they can be done with directio, no cache */ .read = cifs_user_read, .write = cifs_user_write, @@ -693,11 +700,11 @@ const struct file_operations cifs_dir_ops = { #ifdef CONFIG_CIFS_EXPERIMENTAL .dir_notify = cifs_dir_notify, #endif /* CONFIG_CIFS_EXPERIMENTAL */ - .ioctl = cifs_ioctl, + .ioctl = cifs_ioctl, }; static void -cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags) +cifs_init_once(void *inode, struct kmem_cache *cachep, unsigned long flags) { struct cifsInodeInfo *cifsi = inode; @@ -749,7 +756,7 @@ cifs_init_request_bufs(void) cifs_min_rcv = 1; else if (cifs_min_rcv > 64) { cifs_min_rcv = 64; - cERROR(1,("cifs_min_rcv set to maximum (64)")); + cERROR(1, ("cifs_min_rcv set to maximum (64)")); } cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, @@ -762,25 +769,25 @@ cifs_init_request_bufs(void) /* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and almost all handle based requests (but not write response, nor is it sufficient for path based requests). A smaller size would have - been more efficient (compacting multiple slab items on one 4k page) + been more efficient (compacting multiple slab items on one 4k page) for the case in which debug was on, but this larger size allows more SMBs to use small buffer alloc and is still much more - efficient to alloc 1 per page off the slab compared to 17K (5page) + efficient to alloc 1 per page off the slab compared to 17K (5page) alloc of large cifs buffers even when page debugging is on */ cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", - MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, + MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (cifs_sm_req_cachep == NULL) { mempool_destroy(cifs_req_poolp); kmem_cache_destroy(cifs_req_cachep); - return -ENOMEM; + return -ENOMEM; } if (cifs_min_small < 2) cifs_min_small = 2; else if (cifs_min_small > 256) { cifs_min_small = 256; - cFYI(1,("cifs_min_small set to maximum (256)")); + cFYI(1, ("cifs_min_small set to maximum (256)")); } cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, @@ -841,42 +848,43 @@ cifs_destroy_mids(void) kmem_cache_destroy(cifs_oplock_cachep); } -static int cifs_oplock_thread(void * dummyarg) +static int cifs_oplock_thread(void *dummyarg) { - struct oplock_q_entry * oplock_item; + struct oplock_q_entry *oplock_item; struct cifsTconInfo *pTcon; - struct inode * inode; + struct inode *inode; __u16 netfid; int rc; set_freezable(); do { - if (try_to_freeze()) + if (try_to_freeze()) continue; - + spin_lock(&GlobalMid_Lock); if (list_empty(&GlobalOplock_Q)) { spin_unlock(&GlobalMid_Lock); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(39*HZ); } else { - oplock_item = list_entry(GlobalOplock_Q.next, + oplock_item = list_entry(GlobalOplock_Q.next, struct oplock_q_entry, qhead); if (oplock_item) { - cFYI(1,("found oplock item to write out")); + cFYI(1, ("found oplock item to write out")); pTcon = oplock_item->tcon; inode = oplock_item->pinode; netfid = oplock_item->netfid; spin_unlock(&GlobalMid_Lock); DeleteOplockQEntry(oplock_item); /* can not grab inode sem here since it would - deadlock when oplock received on delete + deadlock when oplock received on delete since vfs_unlink holds the i_mutex across the call */ /* mutex_lock(&inode->i_mutex);*/ if (S_ISREG(inode->i_mode)) { rc = filemap_fdatawrite(inode->i_mapping); - if (CIFS_I(inode)->clientCanCacheRead == 0) { + if (CIFS_I(inode)->clientCanCacheRead + == 0) { filemap_fdatawait(inode->i_mapping); invalidate_remote_inode(inode); } @@ -885,20 +893,22 @@ static int cifs_oplock_thread(void * dummyarg) /* mutex_unlock(&inode->i_mutex);*/ if (rc) CIFS_I(inode)->write_behind_rc = rc; - cFYI(1,("Oplock flush inode %p rc %d",inode,rc)); - - /* releasing a stale oplock after recent reconnection - of smb session using a now incorrect file - handle is not a data integrity issue but do - not bother sending an oplock release if session - to server still is disconnected since oplock + cFYI(1, ("Oplock flush inode %p rc %d", + inode, rc)); + + /* releasing stale oplock after recent reconnect + of smb session using a now incorrect file + handle is not a data integrity issue but do + not bother sending an oplock release if session + to server still is disconnected since oplock already released by the server in that case */ if (pTcon->tidStatus != CifsNeedReconnect) { rc = CIFSSMBLock(0, pTcon, netfid, - 0 /* len */ , 0 /* offset */, 0, + 0 /* len */ , 0 /* offset */, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, 0 /* wait flag */); - cFYI(1,("Oplock release rc = %d ",rc)); + cFYI(1, + ("Oplock release rc = %d ", rc)); } } else spin_unlock(&GlobalMid_Lock); @@ -910,7 +920,7 @@ static int cifs_oplock_thread(void * dummyarg) return 0; } -static int cifs_dnotify_thread(void * dummyarg) +static int cifs_dnotify_thread(void *dummyarg) { struct list_head *tmp; struct cifsSesInfo *ses; @@ -925,9 +935,9 @@ static int cifs_dnotify_thread(void * dummyarg) to be woken up and wakeq so the thread can wake up and error out */ list_for_each(tmp, &GlobalSMBSessionList) { - ses = list_entry(tmp, struct cifsSesInfo, + ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (ses && ses->server && + if (ses && ses->server && atomic_read(&ses->server->inFlight)) wake_up_all(&ses->server->response_q); } @@ -951,13 +961,13 @@ init_cifs(void) #ifdef CONFIG_CIFS_EXPERIMENTAL INIT_LIST_HEAD(&GlobalDnotifyReqList); INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); -#endif +#endif /* * Initialize Global counters */ atomic_set(&sesInfoAllocCount, 0); atomic_set(&tconInfoAllocCount, 0); - atomic_set(&tcpSesAllocCount,0); + atomic_set(&tcpSesAllocCount, 0); atomic_set(&tcpSesReconnectCount, 0); atomic_set(&tconInfoReconnectCount, 0); @@ -978,10 +988,10 @@ init_cifs(void) if (cifs_max_pending < 2) { cifs_max_pending = 2; - cFYI(1,("cifs_max_pending set to min of 2")); + cFYI(1, ("cifs_max_pending set to min of 2")); } else if (cifs_max_pending > 256) { cifs_max_pending = 256; - cFYI(1,("cifs_max_pending set to max of 256")); + cFYI(1, ("cifs_max_pending set to max of 256")); } rc = cifs_init_inodecache(); @@ -1003,14 +1013,14 @@ init_cifs(void) oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); if (IS_ERR(oplockThread)) { rc = PTR_ERR(oplockThread); - cERROR(1,("error %d create oplock thread", rc)); + cERROR(1, ("error %d create oplock thread", rc)); goto out_unregister_filesystem; } dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd"); if (IS_ERR(dnotifyThread)) { rc = PTR_ERR(dnotifyThread); - cERROR(1,("error %d create dnotify thread", rc)); + cERROR(1, ("error %d create dnotify thread", rc)); goto out_stop_oplock_thread; } @@ -1036,7 +1046,7 @@ init_cifs(void) static void __exit exit_cifs(void) { - cFYI(0, ("In unregister ie exit_cifs")); + cFYI(0, ("exit_cifs")); #ifdef CONFIG_PROC_FS cifs_proc_clean(); #endif @@ -1049,9 +1059,10 @@ exit_cifs(void) } MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); -MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */ +MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */ MODULE_DESCRIPTION - ("VFS to access servers complying with the SNIA CIFS Specification e.g. Samba and Windows"); + ("VFS to access servers complying with the SNIA CIFS Specification " + "e.g. Samba and Windows"); MODULE_VERSION(CIFS_VERSION); module_init(init_cifs) module_exit(exit_cifs) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c235d32ad4a8..a20de77a3856 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _CIFSFS_H @@ -43,9 +43,9 @@ extern void cifs_read_inode(struct inode *); /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; -extern int cifs_create(struct inode *, struct dentry *, int, +extern int cifs_create(struct inode *, struct dentry *, int, struct nameidata *); -extern struct dentry * cifs_lookup(struct inode *, struct dentry *, +extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *, struct dentry *); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); @@ -63,16 +63,16 @@ extern const struct inode_operations cifs_symlink_inode_ops; /* Functions related to files and directories */ extern const struct file_operations cifs_file_ops; -extern const struct file_operations cifs_file_direct_ops; /* if directio mount */ +extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ extern const struct file_operations cifs_file_nobrl_ops; -extern const struct file_operations cifs_file_direct_nobrl_ops; /* if directio mount */ +extern const struct file_operations cifs_file_direct_nobrl_ops; /* no brlocks */ extern int cifs_open(struct inode *inode, struct file *file); extern int cifs_close(struct inode *inode, struct file *file); extern int cifs_closedir(struct inode *inode, struct file *file); extern ssize_t cifs_user_read(struct file *file, char __user *read_data, - size_t read_size, loff_t * poffset); + size_t read_size, loff_t *poffset); extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, - size_t write_size, loff_t * poffset); + size_t write_size, loff_t *poffset); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, struct dentry *, int); extern int cifs_flush(struct file *, fl_owner_t id); @@ -88,8 +88,9 @@ extern struct dentry_operations cifs_ci_dentry_ops; /* Functions related to symlinks */ extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); -extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *); -extern int cifs_readlink(struct dentry *direntry, char __user *buffer, +extern void cifs_put_link(struct dentry *direntry, + struct nameidata *nd, void *); +extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname); @@ -98,7 +99,7 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *, size_t, int); extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); -extern int cifs_ioctl (struct inode * inode, struct file * filep, +extern int cifs_ioctl (struct inode *inode, struct file *filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.49" +#define CIFS_VERSION "1.50" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 23655de2f4a4..b98742fc3b5a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1,7 +1,7 @@ /* * fs/cifs/cifsglob.h * - * Copyright (C) International Business Machines Corp., 2002,2006 + * Copyright (C) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * Jeremy Allison (jra@samba.org) * @@ -14,7 +14,7 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU Lesser General Public License for more details. - * + * */ #include <linux/in.h> #include <linux/in6.h> @@ -28,7 +28,7 @@ #define MAX_TREE_SIZE 2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1 #define MAX_SERVER_SIZE 15 -#define MAX_SHARE_SIZE 64 /* used to be 20 - this should still be enough */ +#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ #define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null termination then *2 for unicode versions */ #define MAX_PASSWORD_SIZE 16 @@ -38,13 +38,13 @@ /* * MAX_REQ is the maximum number of requests that WE will send * on one socket concurently. It also matches the most common - * value of max multiplex returned by servers. We may + * value of max multiplex returned by servers. We may * eventually want to use the negotiated value (in case * future servers can handle more) when we are more confident that * we will not have problems oveloading the socket with pending * write data. */ -#define CIFS_MAX_REQ 50 +#define CIFS_MAX_REQ 50 #define SERVER_NAME_LENGTH 15 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) @@ -104,6 +104,17 @@ enum protocolEnum { /* Netbios frames protocol not supported at this time */ }; +struct mac_key { + unsigned int len; + union { + char ntlm[CIFS_SESS_KEY_SIZE + 16]; + struct { + char key[16]; + struct ntlmv2_resp resp; + } ntlmv2; + } data; +}; + /* ***************************************************************** * Except the CIFS PDUs themselves all the @@ -120,13 +131,13 @@ struct TCP_Server_Info { struct sockaddr_in sockAddr; struct sockaddr_in6 sockAddr6; } addr; - wait_queue_head_t response_q; + wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; void *Server_NlsInfo; /* BB - placeholder for future NLS info */ unsigned short server_codepage; /* codepage for the server */ unsigned long ip_address; /* IP addr for the server if known */ - enum protocolEnum protocolType; + enum protocolEnum protocolType; char versionMajor; char versionMinor; unsigned svlocal:1; /* local server or remote */ @@ -159,14 +170,15 @@ struct TCP_Server_Info { /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* needed for CIFS PDU signature */ - char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; + struct mac_key mac_signing_key; + char ntlmv2_hash[16]; unsigned long lstrp; /* when we got last response from this server */ }; /* * The following is our shortcut to user information. We surface the uid, * and name. We always get the password on the fly in case it - * has changed. We also hang a list of sessions owned by this user off here. + * has changed. We also hang a list of sessions owned by this user off here. */ struct cifsUidInfo { struct list_head userList; @@ -197,11 +209,11 @@ struct cifsSesInfo { int Suid; /* remote smb uid */ uid_t linux_uid; /* local Linux uid */ int capabilities; - char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for + char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for TCP names - will ipv6 and sctp addresses fit? */ char userName[MAX_USERNAME_SIZE + 1]; - char * domainName; - char * password; + char *domainName; + char *password; }; /* no more than one of the following three session flags may be set */ #define CIFS_SES_NT4 1 @@ -213,7 +225,7 @@ struct cifsSesInfo { #define CIFS_SES_LANMAN 8 /* * there is one of these for each connection to a resource on a particular - * session + * session */ struct cifsTconInfo { struct list_head cifsConnectionList; @@ -269,7 +281,9 @@ struct cifsTconInfo { FILE_SYSTEM_UNIX_INFO fsUnixInfo; unsigned retry:1; unsigned nocase:1; - /* BB add field for back pointer to sb struct? */ + unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol + for this mount even if server would support */ + /* BB add field for back pointer to sb struct(s)? */ }; /* @@ -291,9 +305,9 @@ struct cifs_search_info { __u16 entries_in_buffer; __u16 info_level; __u32 resume_key; - char * ntwrk_buf_start; - char * srch_entries_start; - char * presume_name; + char *ntwrk_buf_start; + char *srch_entries_start; + char *presume_name; unsigned int resume_name_len; unsigned endOfSearch:1; unsigned emptyDir:1; @@ -309,15 +323,15 @@ struct cifsFileInfo { __u16 netfid; /* file id from remote */ /* BB add lock scope info here if needed */ ; /* lock scope id (0 if none) */ - struct file * pfile; /* needed for writepage */ - struct inode * pInode; /* needed for oplock break */ + struct file *pfile; /* needed for writepage */ + struct inode *pInode; /* needed for oplock break */ struct mutex lock_mutex; struct list_head llist; /* list of byte range locks we have. */ unsigned closePend:1; /* file is marked to close */ unsigned invalidHandle:1; /* file closed via session abend */ atomic_t wrtPending; /* handle in use - defer close */ struct semaphore fh_sem; /* prevents reopen race after dead ses*/ - char * search_resume_name; /* BB removeme BB */ + char *search_resume_name; /* BB removeme BB */ struct cifs_search_info srch_inf; }; @@ -327,7 +341,7 @@ struct cifsFileInfo { struct cifsInodeInfo { struct list_head lockList; - /* BB add in lists for dirty pages - i.e. write caching info for oplock */ + /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; int write_behind_rc; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ @@ -381,9 +395,9 @@ static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon, } #else -#define cifs_stats_inc(field) do {} while(0) -#define cifs_stats_bytes_written(tcon, bytes) do {} while(0) -#define cifs_stats_bytes_read(tcon, bytes) do {} while(0) +#define cifs_stats_inc(field) do {} while (0) +#define cifs_stats_bytes_written(tcon, bytes) do {} while (0) +#define cifs_stats_bytes_read(tcon, bytes) do {} while (0) #endif @@ -410,8 +424,8 @@ struct mid_q_entry { struct oplock_q_entry { struct list_head qhead; - struct inode * pinode; - struct cifsTconInfo * tcon; + struct inode *pinode; + struct cifsTconInfo *tcon; __u16 netfid; }; @@ -426,7 +440,7 @@ struct dir_notify_req { __u16 netfid; __u32 filter; /* CompletionFilter (for multishot) */ int multishot; - struct file * pfile; + struct file *pfile; }; #define MID_FREE 0 @@ -464,7 +478,7 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_LANMAN 0x10010 #define CIFSSEC_MUST_PLNTXT 0x20020 #define CIFSSEC_MASK 0x37037 /* current flags supported if weak */ -#else +#else #define CIFSSEC_MASK 0x07007 /* flags supported if no weak config */ #endif /* WEAK_PW_HASH */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ @@ -502,7 +516,7 @@ require use of the stronger protocol */ * ---------- * sesSem operations on smb session * tconSem operations on tree connection - * fh_sem file handle reconnection operations + * fh_sem file handle reconnection operations * ****************************************************************************/ @@ -515,7 +529,7 @@ require use of the stronger protocol */ /* * The list of servers that did not respond with NT LM 0.12. * This list helps improve performance and eliminate the messages indicating - * that we had a communications error talking to the server in this list. + * that we had a communications error talking to the server in this list. */ /* Feature not supported */ /* GLOBAL_EXTERN struct servers_not_supported *NotSuppList; */ @@ -568,12 +582,12 @@ GLOBAL_EXTERN atomic_t midCount; /* Misc globals */ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions to be established on existing mount if we - have the uid/password or Kerberos credential + have the uid/password or Kerberos credential or equivalent for current user */ GLOBAL_EXTERN unsigned int oplockEnabled; GLOBAL_EXTERN unsigned int experimEnabled; GLOBAL_EXTERN unsigned int lookupCacheEnabled; -GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent +GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent with more secure ntlmssp2 challenge/resp */ GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index d619ca7d1416..6a2056e58ceb 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -144,7 +144,7 @@ #define SMBOPEN_OAPPEND 0x0001 /* - * SMB flag definitions + * SMB flag definitions */ #define SMBFLG_EXTD_LOCK 0x01 /* server supports lock-read write-unlock smb */ #define SMBFLG_RCV_POSTED 0x02 /* obsolete */ @@ -157,9 +157,9 @@ #define SMBFLG_RESPONSE 0x80 /* this PDU is a response from server */ /* - * SMB flag2 definitions + * SMB flag2 definitions */ -#define SMBFLG2_KNOWS_LONG_NAMES cpu_to_le16(1) /* can send long (non-8.3) +#define SMBFLG2_KNOWS_LONG_NAMES cpu_to_le16(1) /* can send long (non-8.3) path names in response */ #define SMBFLG2_KNOWS_EAS cpu_to_le16(2) #define SMBFLG2_SECURITY_SIGNATURE cpu_to_le16(4) @@ -260,7 +260,7 @@ #define ATTR_SPARSE 0x0200 #define ATTR_REPARSE 0x0400 #define ATTR_COMPRESSED 0x0800 -#define ATTR_OFFLINE 0x1000 /* ie file not immediately available - +#define ATTR_OFFLINE 0x1000 /* ie file not immediately available - on offline storage */ #define ATTR_NOT_CONTENT_INDEXED 0x2000 #define ATTR_ENCRYPTED 0x4000 @@ -300,7 +300,7 @@ #define CREATE_DELETE_ON_CLOSE 0x00001000 #define CREATE_OPEN_BY_ID 0x00002000 #define OPEN_REPARSE_POINT 0x00200000 -#define CREATE_OPTIONS_MASK 0x007FFFFF +#define CREATE_OPTIONS_MASK 0x007FFFFF #define CREATE_OPTION_SPECIAL 0x20000000 /* system. NB not sent over wire */ /* ImpersonationLevel flags */ @@ -366,17 +366,19 @@ struct smb_hdr { #define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) + 2 ) /* - * Computer Name Length + * Computer Name Length (since Netbios name was length 16 with last byte 0x20) + * No longer as important, now that TCP names are more commonly used to + * resolve hosts. */ #define CNLEN 15 /* - * Share Name Length @S8A - * Note: This length is limited by the SMB used to get @S8A - * the Share info. NetShareEnum only returns 13 @S8A - * chars, including the null termination. @S8A + * Share Name Length (SNLEN) + * Note: This length was limited by the SMB used to get + * the Share info. NetShareEnum only returned 13 + * chars, including the null termination. + * This was removed because it no longer is limiting. */ -#define SNLEN 12 /*@S8A */ /* * Comment Length @@ -394,8 +396,8 @@ struct smb_hdr { * * The Naming convention is the lower case version of the * smb command code name for the struct and this is typedef to the - * uppercase version of the same name with the prefix SMB_ removed - * for brevity. Although typedefs are not commonly used for + * uppercase version of the same name with the prefix SMB_ removed + * for brevity. Although typedefs are not commonly used for * structure definitions in the Linux kernel, their use in the * CIFS standards document, which this code is based on, may * make this one of the cases where typedefs for structures make @@ -403,7 +405,7 @@ struct smb_hdr { * Typedefs can always be removed later if they are too distracting * and they are only used for the CIFSs PDUs themselves, not * internal cifs vfs structures - * + * */ typedef struct negotiate_req { @@ -511,7 +513,7 @@ typedef union smb_com_session_setup_andx { unsigned char SecurityBlob[1]; /* followed by */ /* STRING NativeOS */ /* STRING NativeLanMan */ - } __attribute__((packed)) req; /* NTLM request format (with + } __attribute__((packed)) req; /* NTLM request format (with extended security */ struct { /* request format */ @@ -549,7 +551,7 @@ typedef union smb_com_session_setup_andx { /* unsigned char * NativeOS; */ /* unsigned char * NativeLanMan; */ /* unsigned char * PrimaryDomain; */ - } __attribute__((packed)) resp; /* NTLM response + } __attribute__((packed)) resp; /* NTLM response (with or without extended sec) */ struct { /* request format */ @@ -618,7 +620,7 @@ struct ntlmv2_resp { #define CAP_NT_SMBS 0x00000010 #define CAP_STATUS32 0x00000040 #define CAP_LEVEL_II_OPLOCKS 0x00000080 -#define CAP_NT_FIND 0x00000200 /* reserved should be zero +#define CAP_NT_FIND 0x00000200 /* reserved should be zero (because NT_SMBs implies the same thing?) */ #define CAP_BULK_TRANSFER 0x20000000 #define CAP_EXTENDED_SECURITY 0x80000000 @@ -676,7 +678,7 @@ typedef struct smb_com_logoff_andx_rsp { __u16 ByteCount; } __attribute__((packed)) LOGOFF_ANDX_RSP; -typedef union smb_com_tree_disconnect { /* as an altetnative can use flag on +typedef union smb_com_tree_disconnect { /* as an altetnative can use flag on tree_connect PDU to effect disconnect */ /* tdis is probably simplest SMB PDU */ struct { @@ -712,6 +714,7 @@ typedef struct smb_com_findclose_req { #define REQ_OPLOCK 0x00000002 #define REQ_BATCHOPLOCK 0x00000004 #define REQ_OPENDIRONLY 0x00000008 +#define REQ_EXTENDED_INFO 0x00000010 typedef struct smb_com_open_req { /* also handles create */ struct smb_hdr hdr; /* wct = 24 */ @@ -799,27 +802,28 @@ typedef struct smb_com_openx_rsp { __u32 FileId; __u16 Reserved; __u16 ByteCount; -} __attribute__((packed)) OPENX_RSP; +} __attribute__((packed)) OPENX_RSP; /* For encoding of POSIX Open Request - see trans2 function 0x209 data struct */ /* Legacy write request for older servers */ typedef struct smb_com_writex_req { - struct smb_hdr hdr; /* wct = 12 */ - __u8 AndXCommand; - __u8 AndXReserved; - __le16 AndXOffset; - __u16 Fid; - __le32 OffsetLow; - __u32 Reserved; /* Timeout */ - __le16 WriteMode; /* 1 = write through */ - __le16 Remaining; - __le16 Reserved2; - __le16 DataLengthLow; - __le16 DataOffset; - __le16 ByteCount; - __u8 Pad; /* BB check for whether padded to DWORD boundary and optimum performance here */ - char Data[0]; + struct smb_hdr hdr; /* wct = 12 */ + __u8 AndXCommand; + __u8 AndXReserved; + __le16 AndXOffset; + __u16 Fid; + __le32 OffsetLow; + __u32 Reserved; /* Timeout */ + __le16 WriteMode; /* 1 = write through */ + __le16 Remaining; + __le16 Reserved2; + __le16 DataLengthLow; + __le16 DataOffset; + __le16 ByteCount; + __u8 Pad; /* BB check for whether padded to DWORD + boundary and optimum performance here */ + char Data[0]; } __attribute__((packed)) WRITEX_REQ; typedef struct smb_com_write_req { @@ -837,7 +841,8 @@ typedef struct smb_com_write_req { __le16 DataOffset; __le32 OffsetHigh; __le16 ByteCount; - __u8 Pad; /* BB check for whether padded to DWORD boundary and optimum performance here */ + __u8 Pad; /* BB check for whether padded to DWORD + boundary and optimum performance here */ char Data[0]; } __attribute__((packed)) WRITE_REQ; @@ -855,17 +860,17 @@ typedef struct smb_com_write_rsp { /* legacy read request for older servers */ typedef struct smb_com_readx_req { - struct smb_hdr hdr; /* wct = 10 */ - __u8 AndXCommand; - __u8 AndXReserved; - __le16 AndXOffset; - __u16 Fid; - __le32 OffsetLow; - __le16 MaxCount; - __le16 MinCount; /* obsolete */ - __le32 Reserved; - __le16 Remaining; - __le16 ByteCount; + struct smb_hdr hdr; /* wct = 10 */ + __u8 AndXCommand; + __u8 AndXReserved; + __le16 AndXOffset; + __u16 Fid; + __le32 OffsetLow; + __le16 MaxCount; + __le16 MinCount; /* obsolete */ + __le32 Reserved; + __le16 Remaining; + __le16 ByteCount; } __attribute__((packed)) READX_REQ; typedef struct smb_com_read_req { @@ -896,7 +901,8 @@ typedef struct smb_com_read_rsp { __le16 DataLengthHigh; __u64 Reserved2; __u16 ByteCount; - __u8 Pad; /* BB check for whether padded to DWORD boundary and optimum performance here */ + __u8 Pad; /* BB check for whether padded to DWORD + boundary and optimum performance here */ char Data[1]; } __attribute__((packed)) READ_RSP; @@ -967,7 +973,7 @@ typedef struct smb_com_rename_req { #define COPY_TARGET_MODE_ASCII 0x0004 /* if not set, binary */ #define COPY_SOURCE_MODE_ASCII 0x0008 /* if not set, binary */ #define COPY_VERIFY_WRITES 0x0010 -#define COPY_TREE 0x0020 +#define COPY_TREE 0x0020 typedef struct smb_com_copy_req { struct smb_hdr hdr; /* wct = 3 */ @@ -975,7 +981,7 @@ typedef struct smb_com_copy_req { __le16 OpenFunction; __le16 Flags; __le16 ByteCount; - __u8 BufferFormat; /* 4 = ASCII or Unicode */ + __u8 BufferFormat; /* 4 = ASCII or Unicode */ unsigned char OldFileName[1]; /* followed by __u8 BufferFormat2 */ /* followed by NewFileName string */ @@ -1083,28 +1089,28 @@ typedef struct smb_com_setattr_rsp { /*******************************************************/ /* NT Transact structure defintions follow */ -/* Currently only ioctl, acl (get security descriptor) */ +/* Currently only ioctl, acl (get security descriptor) */ /* and notify are implemented */ /*******************************************************/ typedef struct smb_com_ntransact_req { - struct smb_hdr hdr; /* wct >= 19 */ - __u8 MaxSetupCount; - __u16 Reserved; - __le32 TotalParameterCount; - __le32 TotalDataCount; - __le32 MaxParameterCount; - __le32 MaxDataCount; - __le32 ParameterCount; - __le32 ParameterOffset; - __le32 DataCount; - __le32 DataOffset; - __u8 SetupCount; /* four setup words follow subcommand */ - /* SNIA spec incorrectly included spurious pad here */ - __le16 SubCommand; /* 2 = IOCTL/FSCTL */ - /* SetupCount words follow then */ - __le16 ByteCount; - __u8 Pad[3]; - __u8 Parms[0]; + struct smb_hdr hdr; /* wct >= 19 */ + __u8 MaxSetupCount; + __u16 Reserved; + __le32 TotalParameterCount; + __le32 TotalDataCount; + __le32 MaxParameterCount; + __le32 MaxDataCount; + __le32 ParameterCount; + __le32 ParameterOffset; + __le32 DataCount; + __le32 DataOffset; + __u8 SetupCount; /* four setup words follow subcommand */ + /* SNIA spec incorrectly included spurious pad here */ + __le16 SubCommand; /* 2 = IOCTL/FSCTL */ + /* SetupCount words follow then */ + __le16 ByteCount; + __u8 Pad[3]; + __u8 Parms[0]; } __attribute__((packed)) NTRANSACT_REQ; typedef struct smb_com_ntransact_rsp { @@ -1120,7 +1126,7 @@ typedef struct smb_com_ntransact_rsp { __le32 DataDisplacement; __u8 SetupCount; /* 0 */ __u16 ByteCount; - /* __u8 Pad[3]; */ + /* __u8 Pad[3]; */ /* parms and data follow */ } __attribute__((packed)) NTRANSACT_RSP; @@ -1215,7 +1221,7 @@ typedef struct smb_com_transaction_change_notify_req { /* __u8 Data[1];*/ } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ; -/* BB eventually change to use generic ntransact rsp struct +/* BB eventually change to use generic ntransact rsp struct and validation routine */ typedef struct smb_com_transaction_change_notify_rsp { struct smb_hdr hdr; /* wct = 18 */ @@ -1262,7 +1268,7 @@ struct file_notify_information { __le32 Action; __le32 FileNameLength; __u8 FileName[0]; -} __attribute__((packed)); +} __attribute__((packed)); struct reparse_data { __u32 ReparseTag; @@ -1331,7 +1337,7 @@ struct trans2_resp { __u8 Reserved1; /* SetupWords[SetupCount]; __u16 ByteCount; - __u16 Reserved2;*/ + __u16 Reserved2;*/ /* data area follows */ } __attribute__((packed)); @@ -1370,9 +1376,9 @@ struct smb_t2_rsp { #define SMB_QUERY_FILE_INTERNAL_INFO 0x3ee #define SMB_QUERY_FILE_ACCESS_INFO 0x3f0 #define SMB_QUERY_FILE_NAME_INFO2 0x3f1 /* 0x30 bytes */ -#define SMB_QUERY_FILE_POSITION_INFO 0x3f6 +#define SMB_QUERY_FILE_POSITION_INFO 0x3f6 #define SMB_QUERY_FILE_MODE_INFO 0x3f8 -#define SMB_QUERY_FILE_ALGN_INFO 0x3f9 +#define SMB_QUERY_FILE_ALGN_INFO 0x3f9 #define SMB_SET_FILE_BASIC_INFO 0x101 @@ -1506,35 +1512,35 @@ struct smb_com_transaction2_sfi_req { __u16 Pad1; __u16 Fid; __le16 InformationLevel; - __u16 Reserved4; + __u16 Reserved4; } __attribute__((packed)); struct smb_com_transaction2_sfi_rsp { struct smb_hdr hdr; /* wct = 10 + SetupCount */ struct trans2_resp t2; __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - + __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */ } __attribute__((packed)); struct smb_t2_qfi_req { - struct smb_hdr hdr; - struct trans2_req t2; + struct smb_hdr hdr; + struct trans2_req t2; __u8 Pad; __u16 Fid; __le16 InformationLevel; } __attribute__((packed)); struct smb_t2_qfi_rsp { - struct smb_hdr hdr; /* wct = 10 + SetupCount */ - struct trans2_resp t2; - __u16 ByteCount; - __u16 Reserved2; /* parameter word reserved - - present for infolevels > 100 */ + struct smb_hdr hdr; /* wct = 10 + SetupCount */ + struct trans2_resp t2; + __u16 ByteCount; + __u16 Reserved2; /* parameter word reserved - + present for infolevels > 100 */ } __attribute__((packed)); /* - * Flags on T2 FINDFIRST and FINDNEXT + * Flags on T2 FINDFIRST and FINDNEXT */ #define CIFS_SEARCH_CLOSE_ALWAYS 0x0001 #define CIFS_SEARCH_CLOSE_AT_END 0x0002 @@ -1743,7 +1749,9 @@ typedef struct smb_com_transaction2_get_dfs_refer_req { __u8 Reserved3; __le16 SubCommand; /* one setup word */ __le16 ByteCount; - __u8 Pad[3]; /* Win2K has sent 0x0F01 (max resp length perhaps?) followed by one byte pad - doesn't seem to matter though */ + __u8 Pad[3]; /* Win2K has sent 0x0F01 (max response length + perhaps?) followed by one byte pad - doesn't + seem to matter though */ __le16 MaxReferralLevel; char RequestFileName[1]; } __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_REQ; @@ -1752,7 +1760,10 @@ typedef struct dfs_referral_level_3 { __le16 VersionNumber; __le16 ReferralSize; __le16 ServerType; /* 0x0001 = CIFS server */ - __le16 ReferralFlags; /* or proximity - not clear which since always set to zero - SNIA spec says 0x01 means strip off PathConsumed chars before submitting RequestFileName to remote node */ + __le16 ReferralFlags; /* or proximity - not clear which since it is + always set to zero - SNIA spec says 0x01 + means strip off PathConsumed chars before + submitting RequestFileName to remote node */ __le16 TimeToLive; __le16 Proximity; __le16 DfsPathOffset; @@ -1778,11 +1789,13 @@ typedef struct smb_com_transaction_get_dfs_refer_rsp { #define DFSREF_STORAGE_SERVER 0x0002 /* IOCTL information */ -/* List of ioctl function codes that look to be of interest to remote clients like this. */ -/* Need to do some experimentation to make sure they all work remotely. */ -/* Some of the following such as the encryption/compression ones would be */ -/* invoked from tools via a specialized hook into the VFS rather than via the */ -/* standard vfs entry points */ +/* + * List of ioctl function codes that look to be of interest to remote clients + * like this one. Need to do some experimentation to make sure they all work + * remotely. Some of the following, such as the encryption/compression ones + * would be invoked from tools via a specialized hook into the VFS rather + * than via the standard vfs entry points + */ #define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000 #define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004 #define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008 @@ -1811,7 +1824,7 @@ typedef struct smb_com_transaction_get_dfs_refer_rsp { /* ************************************************************************ * All structs for everything above the SMB PDUs themselves - * (such as the T2 level specific data) go here + * (such as the T2 level specific data) go here ************************************************************************ */ @@ -1857,7 +1870,7 @@ typedef struct { __le64 FreeAllocationUnits; __le32 SectorsPerAllocationUnit; __le32 BytesPerSector; -} __attribute__((packed)) FILE_SYSTEM_INFO; /* size info, level 0x103 */ +} __attribute__((packed)) FILE_SYSTEM_INFO; /* size info, level 0x103 */ typedef struct { __le32 fsid; @@ -1871,7 +1884,7 @@ typedef struct { __le16 MajorVersionNumber; __le16 MinorVersionNumber; __le64 Capability; -} __attribute__((packed)) FILE_SYSTEM_UNIX_INFO; /* Unix extensions info, level 0x200 */ +} __attribute__((packed)) FILE_SYSTEM_UNIX_INFO; /* Unix extension level 0x200*/ /* Version numbers for CIFS UNIX major and minor. */ #define CIFS_UNIX_MAJOR_VERSION 1 @@ -1885,16 +1898,20 @@ typedef struct { #define CIFS_UNIX_POSIX_PATHNAMES_CAP 0x00000010 /* Allow POSIX path chars */ #define CIFS_UNIX_POSIX_PATH_OPS_CAP 0x00000020 /* Allow new POSIX path based calls including posix open - and posix unlink */ + and posix unlink */ +#define CIFS_UNIX_LARGE_READ_CAP 0x00000040 /* support reads >128K (up + to 0xFFFF00 */ +#define CIFS_UNIX_LARGE_WRITE_CAP 0x00000080 + #ifdef CONFIG_CIFS_POSIX /* Can not set pathnames cap yet until we send new posix create SMB since otherwise server can treat such handles opened with older ntcreatex (by a new client which knows how to send posix path ops) as non-posix handles (can affect write behavior with byte range locks. We can add back in POSIX_PATH_OPS cap when Posix Create/Mkdir finished */ -/* #define CIFS_UNIX_CAP_MASK 0x0000003b */ -#define CIFS_UNIX_CAP_MASK 0x0000001b -#else +/* #define CIFS_UNIX_CAP_MASK 0x000000fb */ +#define CIFS_UNIX_CAP_MASK 0x000000db +#else #define CIFS_UNIX_CAP_MASK 0x00000013 #endif /* CONFIG_CIFS_POSIX */ @@ -1904,10 +1921,10 @@ typedef struct { typedef struct { /* For undefined recommended transfer size return -1 in that field */ __le32 OptimalTransferSize; /* bsize on some os, iosize on other os */ - __le32 BlockSize; + __le32 BlockSize; /* The next three fields are in terms of the block size. (above). If block size is unknown, 4096 would be a - reasonable block size for a server to report. + reasonable block size for a server to report. Note that returning the blocks/blocksavail removes need to make a second call (to QFSInfo level 0x103 to get this info. UserBlockAvail is typically less than or equal to BlocksAvail, @@ -2062,9 +2079,9 @@ struct file_alt_name_info { struct file_stream_info { __le32 number_of_streams; /* BB check sizes and verify location */ - /* followed by info on streams themselves + /* followed by info on streams themselves u64 size; - u64 allocation_size + u64 allocation_size stream info */ }; /* level 0x109 */ @@ -2083,7 +2100,7 @@ struct cifs_posix_ace { /* access control entry (ACE) */ __u8 cifs_e_tag; __u8 cifs_e_perm; __le64 cifs_uid; /* or gid */ -} __attribute__((packed)); +} __attribute__((packed)); struct cifs_posix_acl { /* access conrol list (ACL) */ __le16 version; @@ -2138,6 +2155,12 @@ typedef struct { /* struct following varies based on requested level */ } __attribute__((packed)) OPEN_PSX_RSP; /* level 0x209 SetPathInfo data */ +#define SMB_POSIX_UNLINK_FILE_TARGET 0 +#define SMB_POSIX_UNLINK_DIRECTORY_TARGET 1 + +struct unlink_psx_rq { /* level 0x20a SetPathInfo */ + __le16 type; +} __attribute__((packed)); struct file_internal_info { __u64 UniqueId; /* inode number */ @@ -2154,7 +2177,7 @@ struct file_attrib_tag { /********************************************************/ -/* FindFirst/FindNext transact2 data buffer formats */ +/* FindFirst/FindNext transact2 data buffer formats */ /********************************************************/ typedef struct { @@ -2232,7 +2255,7 @@ typedef struct { __le64 EndOfFile; __le64 AllocationSize; __le32 ExtFileAttributes; - __le32 FileNameLength; + __le32 FileNameLength; __le32 EaSize; /* length of the xattrs */ __u8 ShortNameLength; __u8 Reserved; @@ -2259,7 +2282,7 @@ typedef struct { struct win_dev { unsigned char type[8]; /* IntxCHR or IntxBLK */ __le64 major; - __le64 minor; + __le64 minor; } __attribute__((packed)); struct gea { @@ -2291,36 +2314,36 @@ struct fealist { struct data_blob { __u8 *data; size_t length; - void (*free) (struct data_blob * data_blob); + void (*free) (struct data_blob *data_blob); } __attribute__((packed)); #ifdef CONFIG_CIFS_POSIX -/* +/* For better POSIX semantics from Linux client, (even better than the existing CIFS Unix Extensions) we need updated PDUs for: - + 1) PosixCreateX - to set and return the mode, inode#, device info and perhaps add a CreateDevice - to create Pipes and other special .inodes Also note POSIX open flags - 2) Close - to return the last write time to do cache across close + 2) Close - to return the last write time to do cache across close more safely - 3) FindFirst return unique inode number - what about resume key, two + 3) FindFirst return unique inode number - what about resume key, two forms short (matches readdir) and full (enough info to cache inodes) 4) Mkdir - set mode - - And under consideration: + + And under consideration: 5) FindClose2 (return nanosecond timestamp ??) - 6) Use nanosecond timestamps throughout all time fields if + 6) Use nanosecond timestamps throughout all time fields if corresponding attribute flag is set 7) sendfile - handle based copy 8) Direct i/o 9) Misc fcntls? - + what about fixing 64 bit alignment - + There are also various legacy SMB/CIFS requests used as is - + From existing Lanman and NTLM dialects: -------------------------------------- NEGOTIATE @@ -2341,48 +2364,48 @@ struct data_blob { (BB verify that never need to set allocation size) SMB_SET_FILE_BASIC_INFO2 (setting times - BB can it be done via Unix ext?) - + COPY (note support for copy across directories) - FUTURE, OPTIONAL setting/getting OS/2 EAs - FUTURE (BB can this handle setting Linux xattrs perfectly) - OPTIONAL dnotify - FUTURE, OPTIONAL quota - FUTURE, OPTIONAL - - Note that various requests implemented for NT interop such as + + Note that various requests implemented for NT interop such as NT_TRANSACT (IOCTL) QueryReparseInfo are unneeded to servers compliant with the CIFS POSIX extensions - + From CIFS Unix Extensions: ------------------------- T2 SET_PATH_INFO (SMB_SET_FILE_UNIX_LINK) for symlinks T2 SET_PATH_INFO (SMB_SET_FILE_BASIC_INFO2) T2 QUERY_PATH_INFO (SMB_QUERY_FILE_UNIX_LINK) - T2 QUERY_PATH_INFO (SMB_QUERY_FILE_UNIX_BASIC) - BB check for missing inode fields - Actually need QUERY_FILE_UNIX_INFO since has inode num - BB what about a) blksize/blkbits/blocks + T2 QUERY_PATH_INFO (SMB_QUERY_FILE_UNIX_BASIC) BB check for missing + inode fields + Actually a need QUERY_FILE_UNIX_INFO + since has inode num + BB what about a) blksize/blkbits/blocks b) i_version c) i_rdev d) notify mask? e) generation f) size_seqcount T2 FIND_FIRST/FIND_NEXT FIND_FILE_UNIX - TRANS2_GET_DFS_REFERRAL - OPTIONAL but recommended + TRANS2_GET_DFS_REFERRAL - OPTIONAL but recommended T2_QFS_INFO QueryDevice/AttributeInfo - OPTIONAL - - */ /* xsymlink is a symlink format (used by MacOS) that can be used - to save symlink info in a regular file when + to save symlink info in a regular file when mounted to operating systems that do not support the cifs Unix extensions or EAs (for xattr based symlinks). For such a file to be recognized - as containing symlink data: + as containing symlink data: - 1) file size must be 1067, + 1) file size must be 1067, 2) signature must begin file data, 3) length field must be set to ASCII representation - of a number which is less than or equal to 1024, + of a number which is less than or equal to 1024, 4) md5 must match that of the path data */ struct xsymlink { @@ -2393,10 +2416,10 @@ struct xsymlink { char length[4]; char cr1; /* \n */ /* md5 of valid subset of path ie path[0] through path[length-1] */ - __u8 md5[32]; + __u8 md5[32]; char cr2; /* \n */ /* if room left, then end with \n then 0x20s by convention but not required */ - char path[1024]; + char path[1024]; } __attribute__((packed)); typedef struct file_xattr_info { @@ -2405,7 +2428,8 @@ typedef struct file_xattr_info { __u32 xattr_value_len; char xattr_name[0]; /* followed by xattr_value[xattr_value_len], no pad */ -} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute, info level 0x205 */ +} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info + level 0x205 */ /* flags for chattr command */ @@ -2431,8 +2455,9 @@ typedef struct file_xattr_info { typedef struct file_chattr_info { __le64 mask; /* list of all possible attribute bits */ __le64 mode; /* list of actual attribute bits on this inode */ -} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes (chattr, chflags) level 0x206 */ +} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes + (chattr, chflags) level 0x206 */ -#endif +#endif #endif /* _CIFSPDU_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 5d163e2b6143..04a69dafedba 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _CIFSPROTO_H #define _CIFSPROTO_H @@ -49,9 +49,9 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, struct smb_hdr * /* out */ , int * /* bytes returned */ , const int long_op); extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, - struct kvec *, int /* nvec to send */, + struct kvec *, int /* nvec to send */, int * /* type of buf returned */ , const int long_op); -extern int SendReceiveBlockingLock(const unsigned int /* xid */ , +extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , @@ -64,19 +64,19 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, enum securityEnum *secType); -extern int cifs_inet_pton(int, char * source, void *dst); +extern int cifs_inet_pton(int, char *source, void *dst); extern int map_smb_to_linux_error(struct smb_hdr *smb); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of fixed section (word count) in two byte units */); extern int small_smb_init_no_tc(const int smb_cmd, const int wct, struct cifsSesInfo *ses, - void ** request_buf); + void **request_buf); extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, - const int stage, + const int stage, const struct nls_table *nls_cp); extern __u16 GetNextMid(struct TCP_Server_Info *server); -extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, +extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, struct cifsTconInfo *); extern void DeleteOplockQEntry(struct oplock_q_entry *); extern struct timespec cifs_NTtimeToUnix(u64 /* utc nanoseconds since 1601 */ ); @@ -85,12 +85,12 @@ extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); extern int cifs_get_inode_info(struct inode **pinode, - const unsigned char *search_path, + const unsigned char *search_path, FILE_ALL_INFO * pfile_info, struct super_block *sb, int xid); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, - struct super_block *sb,int xid); + struct super_block *sb, int xid); extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, const char *); @@ -98,8 +98,8 @@ extern int cifs_umount(struct super_block *, struct cifs_sb_info *); void cifs_proc_init(void); void cifs_proc_clean(void); -extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, - struct nls_table * nls_info); +extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, + struct nls_table *nls_info); extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, @@ -108,11 +108,11 @@ extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, const char *searchName, const struct nls_table *nls_codepage, - __u16 *searchHandle, struct cifs_search_info * psrch_inf, + __u16 *searchHandle, struct cifs_search_info *psrch_inf, int map, const char dirsep); extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, - __u16 searchHandle, struct cifs_search_info * psrch_inf); + __u16 searchHandle, struct cifs_search_info *psrch_inf); extern int CIFSFindClose(const int, struct cifsTconInfo *tcon, const __u16 search_handle); @@ -123,9 +123,9 @@ extern int CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon, int legacy /* whether to use old info level */, const struct nls_table *nls_codepage, int remap); extern int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, - FILE_ALL_INFO * findData, - const struct nls_table *nls_codepage, int remap); + const unsigned char *searchName, + FILE_ALL_INFO *findData, + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon, @@ -143,13 +143,13 @@ extern int connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, const struct nls_table *nls_codepage, int remap); extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, - const char *old_path, + const char *old_path, const struct nls_table *nls_codepage, - unsigned int *pnum_referrals, - unsigned char ** preferrals, + unsigned int *pnum_referrals, + unsigned char **preferrals, int remap); extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, - struct super_block * sb, struct smb_vol * vol); + struct super_block *sb, struct smb_vol *vol); extern int CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData); extern int SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, @@ -181,11 +181,11 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, - __u64 size, __u16 fileHandle,__u32 opener_pid, + __u64 size, __u16 fileHandle, __u32 opener_pid, int AllocSizeFlag); extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, char *full_path, __u64 mode, __u64 uid, - __u64 gid, dev_t dev, + __u64 gid, dev_t dev, const struct nls_table *nls_codepage, int remap_special_chars); @@ -196,7 +196,10 @@ extern int CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon, extern int CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *name, const struct nls_table *nls_codepage, int remap_special_chars); - +extern int CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, + const char *name, __u16 type, + const struct nls_table *nls_codepage, + int remap_special_chars); extern int CIFSSMBDelFile(const int xid, struct cifsTconInfo *tcon, const char *name, const struct nls_table *nls_codepage, @@ -205,8 +208,8 @@ extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, const char *fromName, const char *toName, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon, - int netfid, char * target_name, +extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, + int netfid, char *target_name, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSCreateHardLink(const int xid, @@ -217,7 +220,7 @@ extern int CIFSCreateHardLink(const int xid, extern int CIFSUnixCreateHardLink(const int xid, struct cifsTconInfo *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage, + const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSUnixCreateSymLink(const int xid, struct cifsTconInfo *tcon, @@ -228,7 +231,7 @@ extern int CIFSSMBUnixQuerySymLink(const int xid, const unsigned char *searchName, char *syminfo, const int buflen, const struct nls_table *nls_codepage); -extern int CIFSSMBQueryReparseLinkInfo(const int xid, +extern int CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, char *symlinkinfo, const int buflen, __u16 fid, @@ -244,35 +247,35 @@ extern int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, const int access_flags, const int omode, __u16 * netfid, int *pOplock, FILE_ALL_INFO *, const struct nls_table *nls_codepage, int remap); -extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, +extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, u32 posix_flags, __u64 mode, __u16 * netfid, FILE_UNIX_BASIC_INFO *pRetData, __u32 *pOplock, const char *name, - const struct nls_table *nls_codepage, int remap); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, const int smb_file_id); extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, - const int netfid, unsigned int count, - const __u64 lseek, unsigned int *nbytes, char **buf, - int * return_buf_type); + const int netfid, unsigned int count, + const __u64 lseek, unsigned int *nbytes, char **buf, + int *return_buf_type); extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, const int netfid, const unsigned int count, const __u64 lseek, unsigned int *nbytes, - const char *buf, const char __user *ubuf, + const char *buf, const char __user *ubuf, const int long_op); extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, const int netfid, const unsigned int count, - const __u64 offset, unsigned int *nbytes, + const __u64 offset, unsigned int *nbytes, struct kvec *iov, const int nvec, const int long_op); extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, __u64 * inode_number, - const struct nls_table *nls_codepage, + const struct nls_table *nls_codepage, int remap_special_chars); extern int cifs_convertUCSpath(char *target, const __le16 *source, int maxlen, - const struct nls_table * codepage); -extern int cifsConvertToUCS(__le16 * target, const char *source, int maxlen, - const struct nls_table * cp, int mapChars); + const struct nls_table *codepage); +extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen, + const struct nls_table *cp, int mapChars); extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const __u16 netfid, const __u64 len, @@ -281,7 +284,7 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, const int waitFlag); extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, - const __u64 len, struct file_lock *, + const __u64 len, struct file_lock *, const __u16 lock_type, const int waitFlag); extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); @@ -291,54 +294,56 @@ extern void sesInfoFree(struct cifsSesInfo *); extern struct cifsTconInfo *tconInfoAlloc(void); extern void tconInfoFree(struct cifsTconInfo *); -extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *,__u32 *); +extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); -extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key, - __u32 expected_sequence_number); -extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass); -extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, +extern int cifs_verify_signature(struct smb_hdr *, + const struct mac_key *mac_key, + __u32 expected_sequence_number); +extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, + const char *pass); +extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, const struct nls_table *); extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * ); -extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, +extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, const struct nls_table *); #ifdef CONFIG_CIFS_WEAK_PW_HASH -extern void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key); +extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key); #endif /* CIFS_WEAK_PW_HASH */ extern int CIFSSMBCopy(int xid, struct cifsTconInfo *source_tcon, const char *fromName, const __u16 target_tid, const char *toName, const int flags, - const struct nls_table *nls_codepage, + const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, - const int notify_subdirs,const __u16 netfid, - __u32 filter, struct file * file, int multishot, +extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, + const int notify_subdirs, const __u16 netfid, + __u32 filter, struct file *file, int multishot, const struct nls_table *nls_codepage); extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, char * EAData, + const unsigned char *searchName, char *EAData, size_t bufsize, const struct nls_table *nls_codepage, int remap_special_chars); -extern ssize_t CIFSSMBQueryEA(const int xid,struct cifsTconInfo * tcon, - const unsigned char * searchName,const unsigned char * ea_name, - unsigned char * ea_value, size_t buf_size, +extern ssize_t CIFSSMBQueryEA(const int xid, struct cifsTconInfo *tcon, + const unsigned char *searchName, const unsigned char *ea_name, + unsigned char *ea_value, size_t buf_size, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, - const char *fileName, const char * ea_name, - const void * ea_value, const __u16 ea_value_len, +extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, + const char *fileName, const char *ea_name, + const void *ea_value, const __u16 ea_value_len, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, char *acl_inf, const int buflen, const int acl_type /* ACCESS vs. DEFAULT */); extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, - char *acl_inf, const int buflen,const int acl_type, + char *acl_inf, const int buflen, const int acl_type, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, const unsigned char *fileName, const char *local_acl, const int buflen, const int acl_type, const struct nls_table *nls_codepage, int remap_special_chars); extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, - const int netfid, __u64 * pExtAttrBits, __u64 *pMask); + const int netfid, __u64 * pExtAttrBits, __u64 *pMask); #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 57419a176688..8eb102f940d4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -48,7 +48,7 @@ static struct { {LANMAN_PROT, "\2LM1.2X002"}, {LANMAN2_PROT, "\2LANMAN2.1"}, #endif /* weak password hashing for legacy clients */ - {CIFS_PROT, "\2NT LM 0.12"}, + {CIFS_PROT, "\2NT LM 0.12"}, {POSIX_PROT, "\2POSIX 2"}, {BAD_PROT, "\2"} }; @@ -61,7 +61,7 @@ static struct { {LANMAN_PROT, "\2LM1.2X002"}, {LANMAN2_PROT, "\2LANMAN2.1"}, #endif /* weak password hashing for legacy clients */ - {CIFS_PROT, "\2NT LM 0.12"}, + {CIFS_PROT, "\2NT LM 0.12"}, {BAD_PROT, "\2"} }; #endif @@ -84,17 +84,17 @@ static struct { /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ -static void mark_open_files_invalid(struct cifsTconInfo * pTcon) +static void mark_open_files_invalid(struct cifsTconInfo *pTcon) { struct cifsFileInfo *open_file = NULL; - struct list_head * tmp; - struct list_head * tmp1; + struct list_head *tmp; + struct list_head *tmp1; /* list all files open on tree connection and mark them invalid */ write_lock(&GlobalSMBSeslock); list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { - open_file = list_entry(tmp,struct cifsFileInfo, tlist); - if(open_file) { + open_file = list_entry(tmp, struct cifsFileInfo, tlist); + if (open_file) { open_file->invalidHandle = TRUE; } } @@ -113,75 +113,78 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for tcp and smb session status done differently for those three - in the calling routine */ - if(tcon) { - if(tcon->tidStatus == CifsExiting) { + if (tcon) { + if (tcon->tidStatus == CifsExiting) { /* only tree disconnect, open, and write, (and ulogoff which does not have tcon) are allowed as we start force umount */ - if((smb_command != SMB_COM_WRITE_ANDX) && - (smb_command != SMB_COM_OPEN_ANDX) && + if ((smb_command != SMB_COM_WRITE_ANDX) && + (smb_command != SMB_COM_OPEN_ANDX) && (smb_command != SMB_COM_TREE_DISCONNECT)) { - cFYI(1,("can not send cmd %d while umounting", + cFYI(1, ("can not send cmd %d while umounting", smb_command)); return -ENODEV; } } - if((tcon->ses) && (tcon->ses->status != CifsExiting) && - (tcon->ses->server)){ + if ((tcon->ses) && (tcon->ses->status != CifsExiting) && + (tcon->ses->server)) { struct nls_table *nls_codepage; - /* Give Demultiplex thread up to 10 seconds to + /* Give Demultiplex thread up to 10 seconds to reconnect, should be greater than cifs socket timeout which is 7 seconds */ - while(tcon->ses->server->tcpStatus == CifsNeedReconnect) { + while (tcon->ses->server->tcpStatus == + CifsNeedReconnect) { wait_event_interruptible_timeout(tcon->ses->server->response_q, - (tcon->ses->server->tcpStatus == CifsGood), 10 * HZ); - if(tcon->ses->server->tcpStatus == CifsNeedReconnect) { + (tcon->ses->server->tcpStatus == + CifsGood), 10 * HZ); + if (tcon->ses->server->tcpStatus == + CifsNeedReconnect) { /* on "soft" mounts we wait once */ - if((tcon->retry == FALSE) || + if ((tcon->retry == FALSE) || (tcon->ses->status == CifsExiting)) { - cFYI(1,("gave up waiting on reconnect in smb_init")); + cFYI(1, ("gave up waiting on " + "reconnect in smb_init")); return -EHOSTDOWN; } /* else "hard" mount - keep retrying until process is killed or server comes back on-line */ } else /* TCP session is reestablished now */ break; - } - + nls_codepage = load_nls_default(); /* need to prevent multiple threads trying to simultaneously reconnect the same SMB session */ down(&tcon->ses->sesSem); - if(tcon->ses->status == CifsNeedReconnect) - rc = cifs_setup_session(0, tcon->ses, + if (tcon->ses->status == CifsNeedReconnect) + rc = cifs_setup_session(0, tcon->ses, nls_codepage); - if(!rc && (tcon->tidStatus == CifsNeedReconnect)) { + if (!rc && (tcon->tidStatus == CifsNeedReconnect)) { mark_open_files_invalid(tcon); - rc = CIFSTCon(0, tcon->ses, tcon->treeName, + rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); up(&tcon->ses->sesSem); /* tell server which Unix caps we support */ if (tcon->ses->capabilities & CAP_UNIX) reset_cifs_unix_caps(0 /* no xid */, - tcon, + tcon, NULL /* we do not know sb */, - NULL /* no vol info */); + NULL /* no vol info */); /* BB FIXME add code to check if wsize needs update due to negotiated smb buffer size shrinking */ - if(rc == 0) + if (rc == 0) atomic_inc(&tconInfoReconnectCount); cFYI(1, ("reconnect tcon rc = %d", rc)); - /* Removed call to reopen open files here - - it is safer (and faster) to reopen files + /* Removed call to reopen open files here. + It is safer (and faster) to reopen files one at a time as needed in read and write */ - /* Check if handle based operation so we + /* Check if handle based operation so we know whether we can continue or not without returning to caller to reset file handle */ - switch(smb_command) { + switch (smb_command) { case SMB_COM_READ_ANDX: case SMB_COM_WRITE_ANDX: case SMB_COM_CLOSE: @@ -200,7 +203,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, return -EIO; } } - if(rc) + if (rc) return rc; *request_buf = cifs_small_buf_get(); @@ -209,23 +212,24 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, return -ENOMEM; } - header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon,wct); + header_assemble((struct smb_hdr *) *request_buf, smb_command, + tcon, wct); - if(tcon != NULL) - cifs_stats_inc(&tcon->num_smbs_sent); + if (tcon != NULL) + cifs_stats_inc(&tcon->num_smbs_sent); return rc; } int -small_smb_init_no_tc(const int smb_command, const int wct, +small_smb_init_no_tc(const int smb_command, const int wct, struct cifsSesInfo *ses, void **request_buf) { int rc; - struct smb_hdr * buffer; + struct smb_hdr *buffer; rc = small_smb_init(smb_command, wct, NULL, request_buf); - if(rc) + if (rc) return rc; buffer = (struct smb_hdr *)*request_buf; @@ -237,7 +241,7 @@ small_smb_init_no_tc(const int smb_command, const int wct, /* uid, tid can stay at zero as set in header assemble */ - /* BB add support for turning on the signing when + /* BB add support for turning on the signing when this function is used after 1st of session setup requests */ return rc; @@ -254,52 +258,53 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for tcp and smb session status done differently for those three - in the calling routine */ - if(tcon) { - if(tcon->tidStatus == CifsExiting) { + if (tcon) { + if (tcon->tidStatus == CifsExiting) { /* only tree disconnect, open, and write, (and ulogoff which does not have tcon) are allowed as we start force umount */ - if((smb_command != SMB_COM_WRITE_ANDX) && + if ((smb_command != SMB_COM_WRITE_ANDX) && (smb_command != SMB_COM_OPEN_ANDX) && (smb_command != SMB_COM_TREE_DISCONNECT)) { - cFYI(1,("can not send cmd %d while umounting", + cFYI(1, ("can not send cmd %d while umounting", smb_command)); return -ENODEV; } } - if((tcon->ses) && (tcon->ses->status != CifsExiting) && - (tcon->ses->server)){ + if ((tcon->ses) && (tcon->ses->status != CifsExiting) && + (tcon->ses->server)) { struct nls_table *nls_codepage; /* Give Demultiplex thread up to 10 seconds to reconnect, should be greater than cifs socket timeout which is 7 seconds */ - while(tcon->ses->server->tcpStatus == CifsNeedReconnect) { + while (tcon->ses->server->tcpStatus == + CifsNeedReconnect) { wait_event_interruptible_timeout(tcon->ses->server->response_q, - (tcon->ses->server->tcpStatus == CifsGood), 10 * HZ); - if(tcon->ses->server->tcpStatus == + (tcon->ses->server->tcpStatus == + CifsGood), 10 * HZ); + if (tcon->ses->server->tcpStatus == CifsNeedReconnect) { /* on "soft" mounts we wait once */ - if((tcon->retry == FALSE) || + if ((tcon->retry == FALSE) || (tcon->ses->status == CifsExiting)) { - cFYI(1,("gave up waiting on reconnect in smb_init")); + cFYI(1, ("gave up waiting on " + "reconnect in smb_init")); return -EHOSTDOWN; } /* else "hard" mount - keep retrying until process is killed or server comes on-line */ } else /* TCP session is reestablished now */ break; - } - nls_codepage = load_nls_default(); /* need to prevent multiple threads trying to simultaneously reconnect the same SMB session */ down(&tcon->ses->sesSem); - if(tcon->ses->status == CifsNeedReconnect) - rc = cifs_setup_session(0, tcon->ses, + if (tcon->ses->status == CifsNeedReconnect) + rc = cifs_setup_session(0, tcon->ses, nls_codepage); - if(!rc && (tcon->tidStatus == CifsNeedReconnect)) { + if (!rc && (tcon->tidStatus == CifsNeedReconnect)) { mark_open_files_invalid(tcon); rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); @@ -307,24 +312,24 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, /* tell server which Unix caps we support */ if (tcon->ses->capabilities & CAP_UNIX) reset_cifs_unix_caps(0 /* no xid */, - tcon, + tcon, NULL /* do not know sb */, NULL /* no vol info */); /* BB FIXME add code to check if wsize needs update due to negotiated smb buffer size shrinking */ - if(rc == 0) + if (rc == 0) atomic_inc(&tconInfoReconnectCount); cFYI(1, ("reconnect tcon rc = %d", rc)); - /* Removed call to reopen open files here - - it is safer (and faster) to reopen files + /* Removed call to reopen open files here. + It is safer (and faster) to reopen files one at a time as needed in read and write */ - /* Check if handle based operation so we + /* Check if handle based operation so we know whether we can continue or not without returning to caller to reset file handle */ - switch(smb_command) { + switch (smb_command) { case SMB_COM_READ_ANDX: case SMB_COM_WRITE_ANDX: case SMB_COM_CLOSE: @@ -343,7 +348,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, return -EIO; } } - if(rc) + if (rc) return rc; *request_buf = cifs_buf_get(); @@ -355,48 +360,48 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, /* potential retries of smb operations it turns out we can determine */ /* from the mid flags when the request buffer can be resent without */ /* having to use a second distinct buffer for the response */ - if(response_buf) - *response_buf = *request_buf; + if (response_buf) + *response_buf = *request_buf; header_assemble((struct smb_hdr *) *request_buf, smb_command, tcon, wct /*wct */ ); - if(tcon != NULL) - cifs_stats_inc(&tcon->num_smbs_sent); + if (tcon != NULL) + cifs_stats_inc(&tcon->num_smbs_sent); return rc; } -static int validate_t2(struct smb_t2_rsp * pSMB) +static int validate_t2(struct smb_t2_rsp *pSMB) { int rc = -EINVAL; int total_size; - char * pBCC; + char *pBCC; /* check for plausible wct, bcc and t2 data and parm sizes */ /* check for parm and data offset going beyond end of smb */ - if(pSMB->hdr.WordCount >= 10) { - if((le16_to_cpu(pSMB->t2_rsp.ParameterOffset) <= 1024) && + if (pSMB->hdr.WordCount >= 10) { + if ((le16_to_cpu(pSMB->t2_rsp.ParameterOffset) <= 1024) && (le16_to_cpu(pSMB->t2_rsp.DataOffset) <= 1024)) { /* check that bcc is at least as big as parms + data */ /* check that bcc is less than negotiated smb buffer */ total_size = le16_to_cpu(pSMB->t2_rsp.ParameterCount); - if(total_size < 512) { - total_size+=le16_to_cpu(pSMB->t2_rsp.DataCount); + if (total_size < 512) { + total_size += + le16_to_cpu(pSMB->t2_rsp.DataCount); /* BCC le converted in SendReceive */ - pBCC = (pSMB->hdr.WordCount * 2) + + pBCC = (pSMB->hdr.WordCount * 2) + sizeof(struct smb_hdr) + (char *)pSMB; - if((total_size <= (*(u16 *)pBCC)) && - (total_size < + if ((total_size <= (*(u16 *)pBCC)) && + (total_size < CIFSMaxBufSize+MAX_CIFS_HDR_SIZE)) { return 0; } - } } } - cifs_dump_mem("Invalid transact2 SMB: ",(char *)pSMB, + cifs_dump_mem("Invalid transact2 SMB: ", (char *)pSMB, sizeof(struct smb_t2_rsp) + 16); return rc; } @@ -408,12 +413,12 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) int rc = 0; int bytes_returned; int i; - struct TCP_Server_Info * server; + struct TCP_Server_Info *server; u16 count; unsigned int secFlags; u16 dialect; - if(ses->server) + if (ses->server) server = ses->server; else { rc = -EIO; @@ -425,20 +430,20 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) return rc; /* if any of auth flags (ie not sign or seal) are overriden use them */ - if(ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) - secFlags = ses->overrideSecFlg; + if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) + secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */ else /* if override flags set only sign/seal OR them with global auth */ secFlags = extended_security | ses->overrideSecFlg; - cFYI(1,("secFlags 0x%x",secFlags)); + cFYI(1, ("secFlags 0x%x", secFlags)); pSMB->hdr.Mid = GetNextMid(server); pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; - + count = 0; - for(i=0;i<CIFS_NUM_PROT;i++) { + for (i = 0; i < CIFS_NUM_PROT; i++) { strncpy(pSMB->DialectsArray+count, protocols[i].name, 16); count += strlen(protocols[i].name) + 1; /* null at end of source and target buffers anyway */ @@ -448,26 +453,26 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - if (rc != 0) + if (rc != 0) goto neg_err_exit; dialect = le16_to_cpu(pSMBr->DialectIndex); - cFYI(1,("Dialect: %d", dialect)); + cFYI(1, ("Dialect: %d", dialect)); /* Check wct = 1 error case */ - if((pSMBr->hdr.WordCount < 13) || (dialect == BAD_PROT)) { + if ((pSMBr->hdr.WordCount < 13) || (dialect == BAD_PROT)) { /* core returns wct = 1, but we do not ask for core - otherwise - small wct just comes when dialect index is -1 indicating we + small wct just comes when dialect index is -1 indicating we could not negotiate a common dialect */ rc = -EOPNOTSUPP; goto neg_err_exit; -#ifdef CONFIG_CIFS_WEAK_PW_HASH - } else if((pSMBr->hdr.WordCount == 13) +#ifdef CONFIG_CIFS_WEAK_PW_HASH + } else if ((pSMBr->hdr.WordCount == 13) && ((dialect == LANMAN_PROT) || (dialect == LANMAN2_PROT))) { __s16 tmp; - struct lanman_neg_rsp * rsp = (struct lanman_neg_rsp *)pSMBr; + struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; - if((secFlags & CIFSSEC_MAY_LANMAN) || + if ((secFlags & CIFSSEC_MAY_LANMAN) || (secFlags & CIFSSEC_MAY_PLNTXT)) server->secType = LANMAN; else { @@ -475,7 +480,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) " in /proc/fs/cifs/SecurityFlags")); rc = -EOPNOTSUPP; goto neg_err_exit; - } + } server->secMode = (__u8)le16_to_cpu(rsp->SecurityMode); server->maxReq = le16_to_cpu(rsp->MaxMpxCount); server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), @@ -483,7 +488,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey); /* even though we do not use raw we might as well set this accurately, in case we ever find a need for it */ - if((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { + if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { server->maxRw = 0xFF00; server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; } else { @@ -504,29 +509,29 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) utc = CURRENT_TIME; ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date), le16_to_cpu(rsp->SrvTime.Time)); - cFYI(1,("SrvTime: %d sec since 1970 (utc: %d) diff: %d", - (int)ts.tv_sec, (int)utc.tv_sec, + cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", + (int)ts.tv_sec, (int)utc.tv_sec, (int)(utc.tv_sec - ts.tv_sec))); val = (int)(utc.tv_sec - ts.tv_sec); seconds = val < 0 ? -val : val; result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; remain = seconds % MIN_TZ_ADJ; - if(remain >= (MIN_TZ_ADJ / 2)) + if (remain >= (MIN_TZ_ADJ / 2)) result += MIN_TZ_ADJ; - if(val < 0) + if (val < 0) result = - result; server->timeAdj = result; } else { server->timeAdj = (int)tmp; server->timeAdj *= 60; /* also in seconds */ } - cFYI(1,("server->timeAdj: %d seconds", server->timeAdj)); + cFYI(1, ("server->timeAdj: %d seconds", server->timeAdj)); /* BB get server time for time conversions and add - code to use it and timezone since this is not UTC */ + code to use it and timezone since this is not UTC */ - if (rsp->EncryptionKeyLength == + if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { memcpy(server->cryptKey, rsp->EncryptionKey, CIFS_CRYPTO_KEY_SIZE); @@ -535,39 +540,39 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) goto neg_err_exit; } - cFYI(1,("LANMAN negotiated")); + cFYI(1, ("LANMAN negotiated")); /* we will not end up setting signing flags - as no signing was in LANMAN and server did not return the flags on */ goto signing_check; #else /* weak security disabled */ - } else if(pSMBr->hdr.WordCount == 13) { - cERROR(1,("mount failed, cifs module not built " + } else if (pSMBr->hdr.WordCount == 13) { + cERROR(1, ("mount failed, cifs module not built " "with CIFS_WEAK_PW_HASH support")); rc = -EOPNOTSUPP; #endif /* WEAK_PW_HASH */ goto neg_err_exit; - } else if(pSMBr->hdr.WordCount != 17) { + } else if (pSMBr->hdr.WordCount != 17) { /* unknown wct */ rc = -EOPNOTSUPP; goto neg_err_exit; } /* else wct == 17 NTLM */ server->secMode = pSMBr->SecurityMode; - if((server->secMode & SECMODE_USER) == 0) - cFYI(1,("share mode security")); + if ((server->secMode & SECMODE_USER) == 0) + cFYI(1, ("share mode security")); - if((server->secMode & SECMODE_PW_ENCRYPT) == 0) + if ((server->secMode & SECMODE_PW_ENCRYPT) == 0) #ifdef CONFIG_CIFS_WEAK_PW_HASH if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) #endif /* CIFS_WEAK_PW_HASH */ - cERROR(1,("Server requests plain text password" + cERROR(1, ("Server requests plain text password" " but client support disabled")); - if((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) + if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) server->secType = NTLMv2; - else if(secFlags & CIFSSEC_MAY_NTLM) + else if (secFlags & CIFSSEC_MAY_NTLM) server->secType = NTLM; - else if(secFlags & CIFSSEC_MAY_NTLMV2) + else if (secFlags & CIFSSEC_MAY_NTLMV2) server->secType = NTLMv2; /* else krb5 ... any others ... */ @@ -596,7 +601,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) /* BB might be helpful to save off the domain of server here */ - if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && + if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && (server->capabilities & CAP_EXTENDED_SECURITY)) { count = pSMBr->ByteCount; if (count < 16) @@ -620,7 +625,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) SecurityBlob, count - 16, &server->secType); - if(rc == 1) { + if (rc == 1) { /* BB Need to fill struct for sessetup here */ rc = -EOPNOTSUPP; } else { @@ -633,26 +638,37 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) #ifdef CONFIG_CIFS_WEAK_PW_HASH signing_check: #endif - if(sign_CIFS_PDUs == FALSE) { - if(server->secMode & SECMODE_SIGN_REQUIRED) - cERROR(1,("Server requires " - "/proc/fs/cifs/PacketSigningEnabled to be on")); - server->secMode &= + if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { + /* MUST_SIGN already includes the MAY_SIGN FLAG + so if this is zero it means that signing is disabled */ + cFYI(1, ("Signing disabled")); + if (server->secMode & SECMODE_SIGN_REQUIRED) + cERROR(1, ("Server requires " + "/proc/fs/cifs/PacketSigningEnabled " + "to be on")); + server->secMode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); - } else if(sign_CIFS_PDUs == 1) { - if((server->secMode & SECMODE_SIGN_REQUIRED) == 0) - server->secMode &= - ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); - } else if(sign_CIFS_PDUs == 2) { - if((server->secMode & + } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { + /* signing required */ + cFYI(1, ("Must sign - secFlags 0x%x", secFlags)); + if ((server->secMode & (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { - cERROR(1,("signing required but server lacks support")); - } + cERROR(1, + ("signing required but server lacks support")); + rc = -EOPNOTSUPP; + } else + server->secMode |= SECMODE_SIGN_REQUIRED; + } else { + /* signing optional ie CIFSSEC_MAY_SIGN */ + if ((server->secMode & SECMODE_SIGN_REQUIRED) == 0) + server->secMode &= + ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); } -neg_err_exit: + +neg_err_exit: cifs_buf_release(pSMB); - cFYI(1,("negprot rc %d",rc)); + cFYI(1, ("negprot rc %d", rc)); return rc; } @@ -669,7 +685,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) * If last user of the connection and * connection alive - disconnect it * If this is the last connection on the server session disconnect it - * (and inside session disconnect we should check if tcp socket needs + * (and inside session disconnect we should check if tcp socket needs * to be freed and kernel thread woken up). */ if (tcon) @@ -683,18 +699,18 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) return -EBUSY; } - /* No need to return error on this operation if tid invalidated and + /* No need to return error on this operation if tid invalidated and closed on server already e.g. due to tcp session crashing */ - if(tcon->tidStatus == CifsNeedReconnect) { + if (tcon->tidStatus == CifsNeedReconnect) { up(&tcon->tconSem); - return 0; + return 0; } - if((tcon->ses == NULL) || (tcon->ses->server == NULL)) { + if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) { up(&tcon->tconSem); return -EIO; } - rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, + rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, (void **)&smb_buffer); if (rc) { up(&tcon->tconSem); @@ -711,7 +727,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) cifs_small_buf_release(smb_buffer); up(&tcon->tconSem); - /* No need to return error on this operation if tid invalidated and + /* No need to return error on this operation if tid invalidated and closed on server already e.g. due to tcp session crashing */ if (rc == -EAGAIN) rc = 0; @@ -745,11 +761,11 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) } smb_buffer_response = (struct smb_hdr *)pSMB; /* BB removeme BB */ - - if(ses->server) { + + if (ses->server) { pSMB->hdr.Mid = GetNextMid(ses->server); - if(ses->server->secMode & + if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; } @@ -772,7 +788,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) cifs_small_buf_release(pSMB); /* if session dead then we do not need to do ulogoff, - since server closed smb session, no sense reporting + since server closed smb session, no sense reporting error */ if (rc == -EAGAIN) rc = 0; @@ -780,6 +796,82 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) } int +CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName, + __u16 type, const struct nls_table *nls_codepage, int remap) +{ + TRANSACTION2_SPI_REQ *pSMB = NULL; + TRANSACTION2_SPI_RSP *pSMBr = NULL; + struct unlink_psx_rq *pRqD; + int name_len; + int rc = 0; + int bytes_returned = 0; + __u16 params, param_offset, offset, byte_count; + + cFYI(1, ("In POSIX delete")); +PsxDelete: + rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len = + cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); + name_len++; /* trailing null */ + name_len *= 2; + } else { /* BB add path length overrun check */ + name_len = strnlen(fileName, PATH_MAX); + name_len++; /* trailing null */ + strncpy(pSMB->FileName, fileName, name_len); + } + + params = 6 + name_len; + pSMB->MaxParameterCount = cpu_to_le16(2); + pSMB->MaxDataCount = 0; /* BB double check this with jra */ + pSMB->MaxSetupCount = 0; + pSMB->Reserved = 0; + pSMB->Flags = 0; + pSMB->Timeout = 0; + pSMB->Reserved2 = 0; + param_offset = offsetof(struct smb_com_transaction2_spi_req, + InformationLevel) - 4; + offset = param_offset + params; + + /* Setup pointer to Request Data (inode type) */ + pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset); + pRqD->type = cpu_to_le16(type); + pSMB->ParameterOffset = cpu_to_le16(param_offset); + pSMB->DataOffset = cpu_to_le16(offset); + pSMB->SetupCount = 1; + pSMB->Reserved3 = 0; + pSMB->SubCommand = cpu_to_le16(TRANS2_SET_PATH_INFORMATION); + byte_count = 3 /* pad */ + params + sizeof(struct unlink_psx_rq); + + pSMB->DataCount = cpu_to_le16(sizeof(struct unlink_psx_rq)); + pSMB->TotalDataCount = cpu_to_le16(sizeof(struct unlink_psx_rq)); + pSMB->ParameterCount = cpu_to_le16(params); + pSMB->TotalParameterCount = pSMB->ParameterCount; + pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); + pSMB->Reserved4 = 0; + pSMB->hdr.smb_buf_length += byte_count; + pSMB->ByteCount = cpu_to_le16(byte_count); + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) { + cFYI(1, ("Posix delete returned %d", rc)); + } + cifs_buf_release(pSMB); + + cifs_stats_inc(&tcon->num_deletes); + + if (rc == -EAGAIN) + goto PsxDelete; + + return rc; +} + +int CIFSSMBDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName, const struct nls_table *nls_codepage, int remap) { @@ -797,7 +889,7 @@ DelFileRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->fileName, fileName, + cifsConvertToUCS((__le16 *) pSMB->fileName, fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -816,7 +908,7 @@ DelFileRetry: cifs_stats_inc(&tcon->num_deletes); if (rc) { cFYI(1, ("Error in RMFile = %d", rc)); - } + } cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -826,7 +918,7 @@ DelFileRetry: } int -CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *dirName, +CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *dirName, const struct nls_table *nls_codepage, int remap) { DELETE_DIRECTORY_REQ *pSMB = NULL; @@ -887,7 +979,7 @@ MkDirRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUCS((__le16 *) pSMB->DirName, name, + name_len = cifsConvertToUCS((__le16 *) pSMB->DirName, name, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -916,7 +1008,7 @@ MkDirRetry: int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags, __u64 mode, __u16 * netfid, FILE_UNIX_BASIC_INFO *pRetData, - __u32 *pOplock, const char *name, + __u32 *pOplock, const char *name, const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; @@ -924,7 +1016,6 @@ CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags, int name_len; int rc = 0; int bytes_returned = 0; - char *data_offset; __u16 params, param_offset, offset, byte_count, count; OPEN_PSX_REQ * pdata; OPEN_PSX_RSP * psx_rsp; @@ -958,13 +1049,12 @@ PsxCreat: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset); pdata->Level = SMB_QUERY_FILE_UNIX_BASIC; pdata->Permissions = cpu_to_le64(mode); - pdata->PosixOpenFlags = cpu_to_le32(posix_flags); + pdata->PosixOpenFlags = cpu_to_le32(posix_flags); pdata->OpenFlags = cpu_to_le32(*pOplock); pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); @@ -979,7 +1069,7 @@ PsxCreat: pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); pSMB->Reserved4 = 0; - pSMB->hdr.smb_buf_length += byte_count; + pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); @@ -988,7 +1078,7 @@ PsxCreat: goto psx_create_err; } - cFYI(1,("copying inode info")); + cFYI(1, ("copying inode info")); rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { @@ -997,34 +1087,33 @@ PsxCreat: } /* copy return information to pRetData */ - psx_rsp = (OPEN_PSX_RSP *)((char *) &pSMBr->hdr.Protocol + psx_rsp = (OPEN_PSX_RSP *)((char *) &pSMBr->hdr.Protocol + le16_to_cpu(pSMBr->t2.DataOffset)); - + *pOplock = le16_to_cpu(psx_rsp->OplockFlags); - if(netfid) + if (netfid) *netfid = psx_rsp->Fid; /* cifs fid stays in le */ /* Let caller know file was created so we can set the mode. */ /* Do we care about the CreateAction in any other cases? */ - if(cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction) + if (cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction) *pOplock |= CIFS_CREATE_ACTION; /* check to make sure response data is there */ - if(psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) { + if (psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) { pRetData->Type = -1; /* unknown */ #ifdef CONFIG_CIFS_DEBUG2 - cFYI(1,("unknown type")); + cFYI(1, ("unknown type")); #endif } else { - if(pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) + if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) + sizeof(FILE_UNIX_BASIC_INFO)) { - cERROR(1,("Open response data too small")); + cERROR(1, ("Open response data too small")); pRetData->Type = -1; goto psx_create_err; } - memcpy((char *) pRetData, + memcpy((char *) pRetData, (char *)psx_rsp + sizeof(OPEN_PSX_RSP), sizeof (FILE_UNIX_BASIC_INFO)); } - psx_create_err: cifs_buf_release(pSMB); @@ -1034,7 +1123,7 @@ psx_create_err: if (rc == -EAGAIN) goto PsxCreat; - return rc; + return rc; } static __u16 convert_disposition(int disposition) @@ -1061,7 +1150,7 @@ static __u16 convert_disposition(int disposition) ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC; break; default: - cFYI(1,("unknown disposition %d",disposition)); + cFYI(1, ("unknown disposition %d", disposition)); ofun = SMBOPEN_OAPPEND; /* regular open */ } return ofun; @@ -1071,7 +1160,7 @@ int SMBLegacyOpen(const int xid, struct cifsTconInfo *tcon, const char *fileName, const int openDisposition, const int access_flags, const int create_options, __u16 * netfid, - int *pOplock, FILE_ALL_INFO * pfile_info, + int *pOplock, FILE_ALL_INFO * pfile_info, const struct nls_table *nls_codepage, int remap) { int rc = -EACCES; @@ -1113,16 +1202,16 @@ OldOpenRetry: 1 = write 2 = rw 3 = execute - */ + */ pSMB->Mode = cpu_to_le16(2); pSMB->Mode |= cpu_to_le16(0x40); /* deny none */ /* set file as system file if special file such as fifo and server expecting SFU style and no Unix extensions */ - if(create_options & CREATE_OPTION_SPECIAL) - pSMB->FileAttributes = cpu_to_le16(ATTR_SYSTEM); - else + if (create_options & CREATE_OPTION_SPECIAL) + pSMB->FileAttributes = cpu_to_le16(ATTR_SYSTEM); + else pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/); /* BB FIXME */ /* if ((omode & S_IWUGO) == 0) @@ -1132,7 +1221,8 @@ OldOpenRetry: being created */ /* BB FIXME BB */ -/* pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK); */ +/* pSMB->CreateOptions = cpu_to_le32(create_options & + CREATE_OPTIONS_MASK); */ /* BB FIXME END BB */ pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); @@ -1143,7 +1233,7 @@ OldOpenRetry: pSMB->ByteCount = cpu_to_le16(count); /* long_op set to 1 to allow for oplock break timeouts */ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 1); + (struct smb_hdr *) pSMBr, &bytes_returned, 1); cifs_stats_inc(&tcon->num_opens); if (rc) { cFYI(1, ("Error in Open = %d", rc)); @@ -1156,17 +1246,17 @@ OldOpenRetry: /* Let caller know file was created so we can set the mode. */ /* Do we care about the CreateAction in any other cases? */ /* BB FIXME BB */ -/* if(cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction) +/* if (cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction) *pOplock |= CIFS_CREATE_ACTION; */ /* BB FIXME END */ - if(pfile_info) { + if (pfile_info) { pfile_info->CreationTime = 0; /* BB convert CreateTime*/ pfile_info->LastAccessTime = 0; /* BB fixme */ pfile_info->LastWriteTime = 0; /* BB fixme */ pfile_info->ChangeTime = 0; /* BB fixme */ pfile_info->Attributes = - cpu_to_le32(le16_to_cpu(pSMBr->FileAttributes)); + cpu_to_le32(le16_to_cpu(pSMBr->FileAttributes)); /* the file_info buf is endian converted by caller */ pfile_info->AllocationSize = cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); @@ -1185,7 +1275,7 @@ int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, const char *fileName, const int openDisposition, const int access_flags, const int create_options, __u16 * netfid, - int *pOplock, FILE_ALL_INFO * pfile_info, + int *pOplock, FILE_ALL_INFO * pfile_info, const struct nls_table *nls_codepage, int remap) { int rc = -EACCES; @@ -1228,7 +1318,7 @@ openRetry: /* set file as system file if special file such as fifo and server expecting SFU style and no Unix extensions */ - if(create_options & CREATE_OPTION_SPECIAL) + if (create_options & CREATE_OPTION_SPECIAL) pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM); else pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL); @@ -1266,10 +1356,10 @@ openRetry: *netfid = pSMBr->Fid; /* cifs fid stays in le */ /* Let caller know file was created so we can set the mode. */ /* Do we care about the CreateAction in any other cases? */ - if(cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction) - *pOplock |= CIFS_CREATE_ACTION; - if(pfile_info) { - memcpy((char *)pfile_info,(char *)&pSMBr->CreationTime, + if (cpu_to_le32(FILE_CREATE) == pSMBr->CreateAction) + *pOplock |= CIFS_CREATE_ACTION; + if (pfile_info) { + memcpy((char *)pfile_info, (char *)&pSMBr->CreationTime, 36 /* CreationTime to Attributes */); /* the file_info buf is endian converted by caller */ pfile_info->AllocationSize = pSMBr->AllocationSize; @@ -1285,10 +1375,9 @@ openRetry: } int -CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, - const int netfid, const unsigned int count, - const __u64 lseek, unsigned int *nbytes, char **buf, - int * pbuf_type) +CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid, + const unsigned int count, const __u64 lseek, unsigned int *nbytes, + char **buf, int *pbuf_type) { int rc = -EACCES; READ_REQ *pSMB = NULL; @@ -1298,8 +1387,8 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, int resp_buf_type = 0; struct kvec iov[1]; - cFYI(1,("Reading %d bytes on fid %d",count,netfid)); - if(tcon->ses->capabilities & CAP_LARGE_FILES) + cFYI(1, ("Reading %d bytes on fid %d", count, netfid)); + if (tcon->ses->capabilities & CAP_LARGE_FILES) wct = 12; else wct = 10; /* old style read */ @@ -1316,28 +1405,28 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, pSMB->AndXCommand = 0xFF; /* none */ pSMB->Fid = netfid; pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF); - if(wct == 12) + if (wct == 12) pSMB->OffsetHigh = cpu_to_le32(lseek >> 32); - else if((lseek >> 32) > 0) /* can not handle this big offset for old */ + else if ((lseek >> 32) > 0) /* can not handle this big offset for old */ return -EIO; pSMB->Remaining = 0; pSMB->MaxCount = cpu_to_le16(count & 0xFFFF); pSMB->MaxCountHigh = cpu_to_le32(count >> 16); - if(wct == 12) + if (wct == 12) pSMB->ByteCount = 0; /* no need to do le conversion since 0 */ else { /* old style read */ - struct smb_com_readx_req * pSMBW = + struct smb_com_readx_req *pSMBW = (struct smb_com_readx_req *)pSMB; pSMBW->ByteCount = 0; } iov[0].iov_base = (char *)pSMB; iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; - rc = SendReceive2(xid, tcon->ses, iov, + rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, - &resp_buf_type, 0); + &resp_buf_type, 0); cifs_stats_inc(&tcon->num_reads); pSMBr = (READ_RSP *)iov[0].iov_base; if (rc) { @@ -1351,33 +1440,34 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, /*check that DataLength would not go beyond end of SMB */ if ((data_length > CIFSMaxBufSize) || (data_length > count)) { - cFYI(1,("bad length %d for count %d",data_length,count)); + cFYI(1, ("bad length %d for count %d", + data_length, count)); rc = -EIO; *nbytes = 0; } else { pReadData = (char *) (&pSMBr->hdr.Protocol) + le16_to_cpu(pSMBr->DataOffset); -/* if(rc = copy_to_user(buf, pReadData, data_length)) { - cERROR(1,("Faulting on read rc = %d",rc)); - rc = -EFAULT; +/* if (rc = copy_to_user(buf, pReadData, data_length)) { + cERROR(1,("Faulting on read rc = %d",rc)); + rc = -EFAULT; }*/ /* can not use copy_to_user when using page cache*/ - if(*buf) - memcpy(*buf,pReadData,data_length); + if (*buf) + memcpy(*buf, pReadData, data_length); } } /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ - if(*buf) { - if(resp_buf_type == CIFS_SMALL_BUFFER) + if (*buf) { + if (resp_buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(iov[0].iov_base); - else if(resp_buf_type == CIFS_LARGE_BUFFER) + else if (resp_buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(iov[0].iov_base); - } else if(resp_buf_type != CIFS_NO_BUFFER) { - /* return buffer to caller to free */ - *buf = iov[0].iov_base; - if(resp_buf_type == CIFS_SMALL_BUFFER) + } else if (resp_buf_type != CIFS_NO_BUFFER) { + /* return buffer to caller to free */ + *buf = iov[0].iov_base; + if (resp_buf_type == CIFS_SMALL_BUFFER) *pbuf_type = CIFS_SMALL_BUFFER; - else if(resp_buf_type == CIFS_LARGE_BUFFER) + else if (resp_buf_type == CIFS_LARGE_BUFFER) *pbuf_type = CIFS_LARGE_BUFFER; } /* else no valid buffer on return - leave as null */ @@ -1391,7 +1481,7 @@ int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, const int netfid, const unsigned int count, const __u64 offset, unsigned int *nbytes, const char *buf, - const char __user * ubuf, const int long_op) + const char __user *ubuf, const int long_op) { int rc = -EACCES; WRITE_REQ *pSMB = NULL; @@ -1401,10 +1491,10 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u16 byte_count; /* cFYI(1,("write at %lld %d bytes",offset,count));*/ - if(tcon->ses == NULL) + if (tcon->ses == NULL) return -ECONNABORTED; - if(tcon->ses->capabilities & CAP_LARGE_FILES) + if (tcon->ses->capabilities & CAP_LARGE_FILES) wct = 14; else wct = 12; @@ -1420,20 +1510,20 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, pSMB->AndXCommand = 0xFF; /* none */ pSMB->Fid = netfid; pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF); - if(wct == 14) + if (wct == 14) pSMB->OffsetHigh = cpu_to_le32(offset >> 32); - else if((offset >> 32) > 0) /* can not handle this big offset for old */ + else if ((offset >> 32) > 0) /* can not handle big offset for old srv */ return -EIO; - + pSMB->Reserved = 0xFFFFFFFF; pSMB->WriteMode = 0; pSMB->Remaining = 0; - /* Can increase buffer size if buffer is big enough in some cases - ie we + /* Can increase buffer size if buffer is big enough in some cases ie we can send more if LARGE_WRITE_X capability returned by the server and if our buffer is big enough or if we convert to iovecs on socket writes and eliminate the copy to the CIFS buffer */ - if(tcon->ses->capabilities & CAP_LARGE_WRITE_X) { + if (tcon->ses->capabilities & CAP_LARGE_WRITE_X) { bytes_sent = min_t(const unsigned int, CIFSMaxBufSize, count); } else { bytes_sent = (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) @@ -1443,11 +1533,11 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, if (bytes_sent > count) bytes_sent = count; pSMB->DataOffset = - cpu_to_le16(offsetof(struct smb_com_write_req,Data) - 4); - if(buf) - memcpy(pSMB->Data,buf,bytes_sent); - else if(ubuf) { - if(copy_from_user(pSMB->Data,ubuf,bytes_sent)) { + cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); + if (buf) + memcpy(pSMB->Data, buf, bytes_sent); + else if (ubuf) { + if (copy_from_user(pSMB->Data, ubuf, bytes_sent)) { cifs_buf_release(pSMB); return -EFAULT; } @@ -1456,7 +1546,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, cifs_buf_release(pSMB); return -EINVAL; } /* else setting file size with write of zero bytes */ - if(wct == 14) + if (wct == 14) byte_count = bytes_sent + 1; /* pad */ else /* wct == 12 */ { byte_count = bytes_sent + 5; /* bigger pad, smaller smb hdr */ @@ -1465,10 +1555,11 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); pSMB->hdr.smb_buf_length += byte_count; - if(wct == 14) + if (wct == 14) pSMB->ByteCount = cpu_to_le16(byte_count); - else { /* old style write has byte count 4 bytes earlier so 4 bytes pad */ - struct smb_com_writex_req * pSMBW = + else { /* old style write has byte count 4 bytes earlier + so 4 bytes pad */ + struct smb_com_writex_req *pSMBW = (struct smb_com_writex_req *)pSMB; pSMBW->ByteCount = cpu_to_le16(byte_count); } @@ -1487,7 +1578,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, cifs_buf_release(pSMB); - /* Note: On -EAGAIN error only caller can retry on handle based calls + /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ return rc; @@ -1505,9 +1596,9 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, int smb_hdr_len; int resp_buf_type = 0; - cFYI(1,("write2 at %lld %d bytes", (long long)offset, count)); + cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); - if(tcon->ses->capabilities & CAP_LARGE_FILES) + if (tcon->ses->capabilities & CAP_LARGE_FILES) wct = 14; else wct = 12; @@ -1521,37 +1612,37 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, pSMB->AndXCommand = 0xFF; /* none */ pSMB->Fid = netfid; pSMB->OffsetLow = cpu_to_le32(offset & 0xFFFFFFFF); - if(wct == 14) + if (wct == 14) pSMB->OffsetHigh = cpu_to_le32(offset >> 32); - else if((offset >> 32) > 0) /* can not handle this big offset for old */ + else if ((offset >> 32) > 0) /* can not handle big offset for old srv */ return -EIO; pSMB->Reserved = 0xFFFFFFFF; pSMB->WriteMode = 0; pSMB->Remaining = 0; pSMB->DataOffset = - cpu_to_le16(offsetof(struct smb_com_write_req,Data) - 4); + cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); pSMB->DataLengthHigh = cpu_to_le16(count >> 16); smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */ - if(wct == 14) + if (wct == 14) pSMB->hdr.smb_buf_length += count+1; else /* wct == 12 */ - pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ - if(wct == 14) + pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ + if (wct == 14) pSMB->ByteCount = cpu_to_le16(count + 1); else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { - struct smb_com_writex_req * pSMBW = + struct smb_com_writex_req *pSMBW = (struct smb_com_writex_req *)pSMB; pSMBW->ByteCount = cpu_to_le16(count + 5); } iov[0].iov_base = pSMB; - if(wct == 14) + if (wct == 14) iov[0].iov_len = smb_hdr_len + 4; else /* wct == 12 pad bigger by four bytes */ iov[0].iov_len = smb_hdr_len + 8; - + rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, long_op); @@ -1559,7 +1650,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, if (rc) { cFYI(1, ("Send error Write2 = %d", rc)); *nbytes = 0; - } else if(resp_buf_type == 0) { + } else if (resp_buf_type == 0) { /* presumably this can not happen, but best to be safe */ rc = -EIO; *nbytes = 0; @@ -1568,15 +1659,15 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, *nbytes = le16_to_cpu(pSMBr->CountHigh); *nbytes = (*nbytes) << 16; *nbytes += le16_to_cpu(pSMBr->Count); - } + } /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ - if(resp_buf_type == CIFS_SMALL_BUFFER) + if (resp_buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(iov[0].iov_base); - else if(resp_buf_type == CIFS_LARGE_BUFFER) + else if (resp_buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(iov[0].iov_base); - /* Note: On -EAGAIN error only caller can retry on handle based calls + /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ return rc; @@ -1596,7 +1687,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, int timeout = 0; __u16 count; - cFYI(1, ("In CIFSSMBLock - timeout %d numLock %d",waitFlag,numLock)); + cFYI(1, ("In CIFSSMBLock - timeout %d numLock %d", waitFlag, numLock)); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); if (rc) @@ -1604,7 +1695,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */ - if(lockType == LOCKING_ANDX_OPLOCK_RELEASE) { + if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = -1; /* no response expected */ pSMB->Timeout = 0; } else if (waitFlag == TRUE) { @@ -1620,7 +1711,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, pSMB->AndXCommand = 0xFF; /* none */ pSMB->Fid = smb_file_id; /* netfid stays le */ - if((numLock != 0) || (numUnlock != 0)) { + if ((numLock != 0) || (numUnlock != 0)) { pSMB->Locks[0].Pid = cpu_to_le16(current->tgid); /* BB where to store pid high? */ pSMB->Locks[0].LengthLow = cpu_to_le32((u32)len); @@ -1648,7 +1739,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, } cifs_small_buf_release(pSMB); - /* Note: On -EAGAIN error only caller can retry on handle based calls + /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ return rc; } @@ -1656,12 +1747,11 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, const __u16 smb_file_id, const int get_flag, const __u64 len, - struct file_lock *pLockData, const __u16 lock_type, + struct file_lock *pLockData, const __u16 lock_type, const int waitFlag) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; - char *data_offset; struct cifs_posix_lock *parm_data; int rc = 0; int timeout = 0; @@ -1670,7 +1760,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("Posix Lock")); - if(pLockData == NULL) + if (pLockData == NULL) return EINVAL; rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); @@ -1680,7 +1770,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMBr = (struct smb_com_transaction2_sfi_rsp *)pSMB; - params = 6; + params = 6; pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -1688,14 +1778,12 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; - count = sizeof(struct cifs_posix_lock); pSMB->MaxParameterCount = cpu_to_le16(2); pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ pSMB->SetupCount = 1; pSMB->Reserved3 = 0; - if(get_flag) + if (get_flag) pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FILE_INFORMATION); else pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); @@ -1705,11 +1793,11 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMB->TotalDataCount = pSMB->DataCount; pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->ParameterOffset = cpu_to_le16(param_offset); - parm_data = (struct cifs_posix_lock *) + parm_data = (struct cifs_posix_lock *) (((char *) &pSMB->hdr.Protocol) + offset); parm_data->lock_type = cpu_to_le16(lock_type); - if(waitFlag) { + if (waitFlag) { timeout = 3; /* blocking operation, no timeout */ parm_data->lock_flags = cpu_to_le16(1); pSMB->Timeout = cpu_to_le32(-1); @@ -1746,22 +1834,22 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, rc = -EIO; /* bad smb */ goto plk_err_exit; } - if(pLockData == NULL) { + if (pLockData == NULL) { rc = -EINVAL; goto plk_err_exit; } data_offset = le16_to_cpu(pSMBr->t2.DataOffset); data_count = le16_to_cpu(pSMBr->t2.DataCount); - if(data_count < sizeof(struct cifs_posix_lock)) { + if (data_count < sizeof(struct cifs_posix_lock)) { rc = -EIO; goto plk_err_exit; } parm_data = (struct cifs_posix_lock *) ((char *)&pSMBr->hdr.Protocol + data_offset); - if(parm_data->lock_type == cpu_to_le16(CIFS_UNLCK)) + if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK)) pLockData->fl_type = F_UNLCK; } - + plk_err_exit: if (pSMB) cifs_small_buf_release(pSMB); @@ -1784,7 +1872,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) /* do not retry on dead session on close */ rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB); - if(rc == -EAGAIN) + if (rc == -EAGAIN) return 0; if (rc) return rc; @@ -1798,7 +1886,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->num_closes); if (rc) { - if(rc!=-EINTR) { + if (rc != -EINTR) { /* EINTR is expected when user ctl-c to kill app */ cERROR(1, ("Send error in Close = %d", rc)); } @@ -1807,7 +1895,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id) cifs_small_buf_release(pSMB); /* Since session is dead, file will be closed on server already */ - if(rc == -EAGAIN) + if (rc == -EAGAIN) rc = 0; return rc; @@ -1839,7 +1927,7 @@ renameRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, + cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -1851,7 +1939,7 @@ renameRetry: toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fromName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->OldFileName, fromName, name_len); @@ -1872,7 +1960,7 @@ renameRetry: cifs_stats_inc(&tcon->num_renames); if (rc) { cFYI(1, ("Send error in rename = %d", rc)); - } + } cifs_buf_release(pSMB); @@ -1882,13 +1970,13 @@ renameRetry: return rc; } -int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon, - int netfid, char * target_name, - const struct nls_table * nls_codepage, int remap) +int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, + int netfid, char *target_name, + const struct nls_table *nls_codepage, int remap) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; - struct set_file_rename * rename_info; + struct set_file_rename *rename_info; char *data_offset; char dummy_string[30]; int rc = 0; @@ -1927,13 +2015,14 @@ int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon, rename_info->overwrite = cpu_to_le32(1); rename_info->root_fid = 0; /* unicode only call */ - if(target_name == NULL) { - sprintf(dummy_string,"cifs%x",pSMB->hdr.Mid); - len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, + if (target_name == NULL) { + sprintf(dummy_string, "cifs%x", pSMB->hdr.Mid); + len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, dummy_string, 24, nls_codepage, remap); } else { len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, - target_name, PATH_MAX, nls_codepage, remap); + target_name, PATH_MAX, nls_codepage, + remap); } rename_info->target_name_len = cpu_to_le32(2 * len_of_str); count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str) + 2; @@ -1947,10 +2036,10 @@ int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon, pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); + (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&pTcon->num_t2renames); if (rc) { - cFYI(1,("Send error in Rename (by file handle) = %d", rc)); + cFYI(1, ("Send error in Rename (by file handle) = %d", rc)); } cifs_buf_release(pSMB); @@ -1962,9 +2051,9 @@ int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon, } int -CIFSSMBCopy(const int xid, struct cifsTconInfo *tcon, const char * fromName, - const __u16 target_tid, const char *toName, const int flags, - const struct nls_table *nls_codepage, int remap) +CIFSSMBCopy(const int xid, struct cifsTconInfo *tcon, const char *fromName, + const __u16 target_tid, const char *toName, const int flags, + const struct nls_table *nls_codepage, int remap) { int rc = 0; COPY_REQ *pSMB = NULL; @@ -1986,7 +2075,7 @@ copyRetry: pSMB->Flags = cpu_to_le16(flags & COPY_TREE); if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUCS((__le16 *) pSMB->OldFileName, + name_len = cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ @@ -1994,11 +2083,12 @@ copyRetry: pSMB->OldFileName[name_len] = 0x04; /* pad */ /* protocol requires ASCII signature byte on Unicode string */ pSMB->OldFileName[name_len + 1] = 0x00; - name_len2 = cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], + name_len2 = + cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fromName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->OldFileName, fromName, name_len); @@ -2058,7 +2148,7 @@ createSymLinkRetry: name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fromName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, fromName, name_len); @@ -2070,7 +2160,7 @@ createSymLinkRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; data_offset = (char *) (&pSMB->hdr.Protocol) + offset; @@ -2081,7 +2171,7 @@ createSymLinkRetry: , nls_codepage); name_len_target++; /* trailing null */ name_len_target *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len_target = strnlen(toName, PATH_MAX); name_len_target++; /* trailing null */ strncpy(data_offset, toName, name_len_target); @@ -2108,9 +2198,7 @@ createSymLinkRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->num_symlinks); if (rc) { - cFYI(1, - ("Send error in SetPathInfo (create symlink) = %d", - rc)); + cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc)); } if (pSMB) @@ -2149,7 +2237,7 @@ createHardLinkRetry: name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(toName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, toName, name_len); @@ -2161,7 +2249,7 @@ createHardLinkRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; data_offset = (char *) (&pSMB->hdr.Protocol) + offset; @@ -2171,7 +2259,7 @@ createHardLinkRetry: nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len_target = strnlen(fromName, PATH_MAX); name_len_target++; /* trailing null */ strncpy(data_offset, fromName, name_len_target); @@ -2243,13 +2331,13 @@ winCreateHardLinkRetry: name_len++; /* trailing null */ name_len *= 2; pSMB->OldFileName[name_len] = 0; /* pad */ - pSMB->OldFileName[name_len + 1] = 0x04; + pSMB->OldFileName[name_len + 1] = 0x04; name_len2 = - cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], + cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fromName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->OldFileName, fromName, name_len); @@ -2302,12 +2390,11 @@ querySymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifs_strtoUCS((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); @@ -2324,7 +2411,7 @@ querySymLinkRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -2355,16 +2442,16 @@ querySymLinkRetry: if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = UniStrnlen((wchar_t *) ((char *) - &pSMBr->hdr.Protocol +data_offset), - min_t(const int, buflen,count) / 2); + &pSMBr->hdr.Protocol + data_offset), + min_t(const int, buflen, count) / 2); /* BB FIXME investigate remapping reserved chars here */ cifs_strfromUCS_le(symlinkinfo, - (__le16 *) ((char *)&pSMBr->hdr.Protocol + - data_offset), + (__le16 *) ((char *)&pSMBr->hdr.Protocol + + data_offset), name_len, nls_codepage); } else { strncpy(symlinkinfo, - (char *) &pSMBr->hdr.Protocol + + (char *) &pSMBr->hdr.Protocol + data_offset, min_t(const int, buflen, count)); } @@ -2385,14 +2472,14 @@ querySymLinkRetry: Setup words themselves and ByteCount MaxSetupCount (size of returned setup area) and MaxParameterCount (returned parms size) must be set by caller */ -static int +static int smb_init_ntransact(const __u16 sub_command, const int setup_count, const int parm_len, struct cifsTconInfo *tcon, - void ** ret_buf) + void **ret_buf) { int rc; __u32 temp_offset; - struct smb_com_ntransact_req * pSMB; + struct smb_com_ntransact_req *pSMB; rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon, (void **)&pSMB); @@ -2416,47 +2503,47 @@ smb_init_ntransact(const __u16 sub_command, const int setup_count, } static int -validate_ntransact(char * buf, char ** ppparm, char ** ppdata, - int * pdatalen, int * pparmlen) +validate_ntransact(char *buf, char **ppparm, char **ppdata, + int *pdatalen, int *pparmlen) { - char * end_of_smb; + char *end_of_smb; __u32 data_count, data_offset, parm_count, parm_offset; - struct smb_com_ntransact_rsp * pSMBr; + struct smb_com_ntransact_rsp *pSMBr; - if(buf == NULL) + if (buf == NULL) return -EINVAL; pSMBr = (struct smb_com_ntransact_rsp *)buf; /* ByteCount was converted from little endian in SendReceive */ - end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + + end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + (char *)&pSMBr->ByteCount; - data_offset = le32_to_cpu(pSMBr->DataOffset); data_count = le32_to_cpu(pSMBr->DataCount); - parm_offset = le32_to_cpu(pSMBr->ParameterOffset); + parm_offset = le32_to_cpu(pSMBr->ParameterOffset); parm_count = le32_to_cpu(pSMBr->ParameterCount); *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset; *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset; /* should we also check that parm and data areas do not overlap? */ - if(*ppparm > end_of_smb) { - cFYI(1,("parms start after end of smb")); + if (*ppparm > end_of_smb) { + cFYI(1, ("parms start after end of smb")); return -EINVAL; - } else if(parm_count + *ppparm > end_of_smb) { - cFYI(1,("parm end after end of smb")); + } else if (parm_count + *ppparm > end_of_smb) { + cFYI(1, ("parm end after end of smb")); return -EINVAL; - } else if(*ppdata > end_of_smb) { - cFYI(1,("data starts after end of smb")); + } else if (*ppdata > end_of_smb) { + cFYI(1, ("data starts after end of smb")); return -EINVAL; - } else if(data_count + *ppdata > end_of_smb) { + } else if (data_count + *ppdata > end_of_smb) { cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p", - *ppdata, data_count, (data_count + *ppdata), end_of_smb, pSMBr)); /* BB FIXME */ + *ppdata, data_count, (data_count + *ppdata), + end_of_smb, pSMBr)); return -EINVAL; - } else if(parm_count + data_count > pSMBr->ByteCount) { - cFYI(1,("parm count and data count larger than SMB")); + } else if (parm_count + data_count > pSMBr->ByteCount) { + cFYI(1, ("parm count and data count larger than SMB")); return -EINVAL; } return 0; @@ -2465,14 +2552,14 @@ validate_ntransact(char * buf, char ** ppparm, char ** ppdata, int CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, - char *symlinkinfo, const int buflen,__u16 fid, + char *symlinkinfo, const int buflen, __u16 fid, const struct nls_table *nls_codepage) { int rc = 0; int bytes_returned; int name_len; - struct smb_com_transaction_ioctl_req * pSMB; - struct smb_com_transaction_ioctl_rsp * pSMBr; + struct smb_com_transaction_ioctl_req *pSMB; + struct smb_com_transaction_ioctl_rsp *pSMBr; cFYI(1, ("In Windows reparse style QueryLink for path %s", searchName)); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, @@ -2511,47 +2598,53 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, /* BB also check enough total bytes returned */ rc = -EIO; /* bad smb */ else { - if(data_count && (data_count < 2048)) { - char * end_of_smb = 2 /* sizeof byte count */ + + if (data_count && (data_count < 2048)) { + char *end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + (char *)&pSMBr->ByteCount; - struct reparse_data * reparse_buf = (struct reparse_data *) - ((char *)&pSMBr->hdr.Protocol + data_offset); - if((char*)reparse_buf >= end_of_smb) { + struct reparse_data *reparse_buf = + (struct reparse_data *) + ((char *)&pSMBr->hdr.Protocol + + data_offset); + if ((char *)reparse_buf >= end_of_smb) { rc = -EIO; goto qreparse_out; } - if((reparse_buf->LinkNamesBuf + + if ((reparse_buf->LinkNamesBuf + reparse_buf->TargetNameOffset + reparse_buf->TargetNameLen) > end_of_smb) { - cFYI(1,("reparse buf extended beyond SMB")); + cFYI(1,("reparse buf goes beyond SMB")); rc = -EIO; goto qreparse_out; } - + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = UniStrnlen((wchar_t *) - (reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset), - min(buflen/2, reparse_buf->TargetNameLen / 2)); + (reparse_buf->LinkNamesBuf + + reparse_buf->TargetNameOffset), + min(buflen/2, + reparse_buf->TargetNameLen / 2)); cifs_strfromUCS_le(symlinkinfo, - (__le16 *) (reparse_buf->LinkNamesBuf + + (__le16 *) (reparse_buf->LinkNamesBuf + reparse_buf->TargetNameOffset), name_len, nls_codepage); } else { /* ASCII names */ - strncpy(symlinkinfo,reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset, - min_t(const int, buflen, reparse_buf->TargetNameLen)); + strncpy(symlinkinfo, + reparse_buf->LinkNamesBuf + + reparse_buf->TargetNameOffset, + min_t(const int, buflen, + reparse_buf->TargetNameLen)); } } else { rc = -EIO; - cFYI(1,("Invalid return data count on get reparse info ioctl")); + cFYI(1, ("Invalid return data count on " + "get reparse info ioctl")); } symlinkinfo[buflen] = 0; /* just in case so the caller does not go off the end of the buffer */ - cFYI(1,("readlink result - %s",symlinkinfo)); + cFYI(1, ("readlink result - %s", symlinkinfo)); } } qreparse_out: @@ -2566,7 +2659,8 @@ qreparse_out: #ifdef CONFIG_CIFS_POSIX /*Convert an Access Control Entry from wire format to local POSIX xattr format*/ -static void cifs_convert_ace(posix_acl_xattr_entry * ace, struct cifs_posix_ace * cifs_ace) +static void cifs_convert_ace(posix_acl_xattr_entry *ace, + struct cifs_posix_ace *cifs_ace) { /* u8 cifs fields do not need le conversion */ ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm); @@ -2578,30 +2672,31 @@ static void cifs_convert_ace(posix_acl_xattr_entry * ace, struct cifs_posix_ace } /* Convert ACL from CIFS POSIX wire format to local Linux POSIX ACL xattr */ -static int cifs_copy_posix_acl(char * trgt,char * src, const int buflen, - const int acl_type,const int size_of_data_area) +static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen, + const int acl_type, const int size_of_data_area) { int size = 0; int i; __u16 count; - struct cifs_posix_ace * pACE; - struct cifs_posix_acl * cifs_acl = (struct cifs_posix_acl *)src; - posix_acl_xattr_header * local_acl = (posix_acl_xattr_header *)trgt; + struct cifs_posix_ace *pACE; + struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)src; + posix_acl_xattr_header *local_acl = (posix_acl_xattr_header *)trgt; if (le16_to_cpu(cifs_acl->version) != CIFS_ACL_VERSION) return -EOPNOTSUPP; - if(acl_type & ACL_TYPE_ACCESS) { + if (acl_type & ACL_TYPE_ACCESS) { count = le16_to_cpu(cifs_acl->access_entry_count); pACE = &cifs_acl->ace_array[0]; size = sizeof(struct cifs_posix_acl); size += sizeof(struct cifs_posix_ace) * count; /* check if we would go beyond end of SMB */ - if(size_of_data_area < size) { - cFYI(1,("bad CIFS POSIX ACL size %d vs. %d",size_of_data_area,size)); + if (size_of_data_area < size) { + cFYI(1, ("bad CIFS POSIX ACL size %d vs. %d", + size_of_data_area, size)); return -EINVAL; } - } else if(acl_type & ACL_TYPE_DEFAULT) { + } else if (acl_type & ACL_TYPE_DEFAULT) { count = le16_to_cpu(cifs_acl->access_entry_count); size = sizeof(struct cifs_posix_acl); size += sizeof(struct cifs_posix_ace) * count; @@ -2610,7 +2705,7 @@ static int cifs_copy_posix_acl(char * trgt,char * src, const int buflen, count = le16_to_cpu(cifs_acl->default_entry_count); size += sizeof(struct cifs_posix_ace) * count; /* check if we would go beyond end of SMB */ - if(size_of_data_area < size) + if (size_of_data_area < size) return -EINVAL; } else { /* illegal type */ @@ -2618,76 +2713,77 @@ static int cifs_copy_posix_acl(char * trgt,char * src, const int buflen, } size = posix_acl_xattr_size(count); - if((buflen == 0) || (local_acl == NULL)) { - /* used to query ACL EA size */ - } else if(size > buflen) { + if ((buflen == 0) || (local_acl == NULL)) { + /* used to query ACL EA size */ + } else if (size > buflen) { return -ERANGE; } else /* buffer big enough */ { local_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); - for(i = 0;i < count ;i++) { - cifs_convert_ace(&local_acl->a_entries[i],pACE); - pACE ++; + for (i = 0; i < count ; i++) { + cifs_convert_ace(&local_acl->a_entries[i], pACE); + pACE++; } } return size; } -static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace * cifs_ace, - const posix_acl_xattr_entry * local_ace) +static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace, + const posix_acl_xattr_entry *local_ace) { __u16 rc = 0; /* 0 = ACL converted ok */ cifs_ace->cifs_e_perm = le16_to_cpu(local_ace->e_perm); cifs_ace->cifs_e_tag = le16_to_cpu(local_ace->e_tag); /* BB is there a better way to handle the large uid? */ - if(local_ace->e_id == cpu_to_le32(-1)) { + if (local_ace->e_id == cpu_to_le32(-1)) { /* Probably no need to le convert -1 on any arch but can not hurt */ cifs_ace->cifs_uid = cpu_to_le64(-1); - } else + } else cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id)); - /*cFYI(1,("perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id));*/ + /*cFYI(1,("perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id));*/ return rc; } /* Convert ACL from local Linux POSIX xattr to CIFS POSIX ACL wire format */ -static __u16 ACL_to_cifs_posix(char * parm_data,const char * pACL,const int buflen, - const int acl_type) +static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, + const int buflen, const int acl_type) { __u16 rc = 0; - struct cifs_posix_acl * cifs_acl = (struct cifs_posix_acl *)parm_data; - posix_acl_xattr_header * local_acl = (posix_acl_xattr_header *)pACL; + struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)parm_data; + posix_acl_xattr_header *local_acl = (posix_acl_xattr_header *)pACL; int count; int i; - if((buflen == 0) || (pACL == NULL) || (cifs_acl == NULL)) + if ((buflen == 0) || (pACL == NULL) || (cifs_acl == NULL)) return 0; count = posix_acl_xattr_count((size_t)buflen); - cFYI(1,("setting acl with %d entries from buf of length %d and version of %d", + cFYI(1, ("setting acl with %d entries from buf of length %d and " + "version of %d", count, buflen, le32_to_cpu(local_acl->a_version))); - if(le32_to_cpu(local_acl->a_version) != 2) { - cFYI(1,("unknown POSIX ACL version %d", + if (le32_to_cpu(local_acl->a_version) != 2) { + cFYI(1, ("unknown POSIX ACL version %d", le32_to_cpu(local_acl->a_version))); return 0; } cifs_acl->version = cpu_to_le16(1); - if(acl_type == ACL_TYPE_ACCESS) + if (acl_type == ACL_TYPE_ACCESS) cifs_acl->access_entry_count = cpu_to_le16(count); - else if(acl_type == ACL_TYPE_DEFAULT) + else if (acl_type == ACL_TYPE_DEFAULT) cifs_acl->default_entry_count = cpu_to_le16(count); else { - cFYI(1,("unknown ACL type %d",acl_type)); + cFYI(1, ("unknown ACL type %d", acl_type)); return 0; } - for(i=0;i<count;i++) { + for (i = 0; i < count; i++) { rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &local_acl->a_entries[i]); - if(rc != 0) { + if (rc != 0) { /* ACE not converted */ break; } } - if(rc == 0) { + if (rc == 0) { rc = (__u16)(count * sizeof(struct cifs_posix_ace)); rc += sizeof(struct cifs_posix_acl); /* BB add check to make sure ACL does not overflow SMB */ @@ -2697,9 +2793,9 @@ static __u16 ACL_to_cifs_posix(char * parm_data,const char * pACL,const int bufl int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, - char *acl_inf, const int buflen, const int acl_type, - const struct nls_table *nls_codepage, int remap) + const unsigned char *searchName, + char *acl_inf, const int buflen, const int acl_type, + const struct nls_table *nls_codepage, int remap) { /* SMB_QUERY_POSIX_ACL */ TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -2708,7 +2804,7 @@ CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, int bytes_returned; int name_len; __u16 params, byte_count; - + cFYI(1, ("In GetPosixACL (Unix) for path %s", searchName)); queryAclRetry: @@ -2716,16 +2812,16 @@ queryAclRetry: (void **) &pSMBr); if (rc) return rc; - + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; pSMB->FileName[name_len] = 0; pSMB->FileName[name_len+1] = 0; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); @@ -2734,7 +2830,7 @@ queryAclRetry: params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - /* BB find exact max data count below from sess structure BB */ + /* BB find exact max data count below from sess structure BB */ pSMB->MaxDataCount = cpu_to_le16(4000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; @@ -2742,7 +2838,8 @@ queryAclRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16( - offsetof(struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + offsetof(struct smb_com_transaction2_qpi_req, + InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -2763,7 +2860,7 @@ queryAclRetry: cFYI(1, ("Send error in Query POSIX ACL = %d", rc)); } else { /* decode response */ - + rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || (pSMBr->ByteCount < 2)) /* BB also check enough total bytes returned */ @@ -2773,7 +2870,7 @@ queryAclRetry: __u16 count = le16_to_cpu(pSMBr->t2.DataCount); rc = cifs_copy_posix_acl(acl_inf, (char *)&pSMBr->hdr.Protocol+data_offset, - buflen,acl_type,count); + buflen, acl_type, count); } } cifs_buf_release(pSMB); @@ -2784,10 +2881,10 @@ queryAclRetry: int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, - const unsigned char *fileName, - const char *local_acl, const int buflen, - const int acl_type, - const struct nls_table *nls_codepage, int remap) + const unsigned char *fileName, + const char *local_acl, const int buflen, + const int acl_type, + const struct nls_table *nls_codepage, int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; @@ -2800,16 +2897,16 @@ CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("In SetPosixACL (Unix) for path %s", fileName)); setAclRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, - (void **) &pSMBr); + (void **) &pSMBr); if (rc) return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, + cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fileName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, fileName, name_len); @@ -2823,15 +2920,15 @@ setAclRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; parm_data = ((char *) &pSMB->hdr.Protocol) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); /* convert to on the wire format for POSIX ACL */ - data_count = ACL_to_cifs_posix(parm_data,local_acl,buflen,acl_type); + data_count = ACL_to_cifs_posix(parm_data, local_acl, buflen, acl_type); - if(data_count == 0) { + if (data_count == 0) { rc = -EOPNOTSUPP; goto setACLerrorExit; } @@ -2849,7 +2946,7 @@ setAclRetry: pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); + (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cFYI(1, ("Set POSIX ACL returned %d", rc)); } @@ -2864,86 +2961,85 @@ setACLerrorExit: /* BB fix tabs in this function FIXME BB */ int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, - const int netfid, __u64 * pExtAttrBits, __u64 *pMask) + const int netfid, __u64 * pExtAttrBits, __u64 *pMask) { - int rc = 0; - struct smb_t2_qfi_req *pSMB = NULL; - struct smb_t2_qfi_rsp *pSMBr = NULL; - int bytes_returned; - __u16 params, byte_count; + int rc = 0; + struct smb_t2_qfi_req *pSMB = NULL; + struct smb_t2_qfi_rsp *pSMBr = NULL; + int bytes_returned; + __u16 params, byte_count; - cFYI(1,("In GetExtAttr")); - if(tcon == NULL) - return -ENODEV; + cFYI(1, ("In GetExtAttr")); + if (tcon == NULL) + return -ENODEV; GetExtAttrRetry: - rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, - (void **) &pSMBr); - if (rc) - return rc; - - params = 2 /* level */ +2 /* fid */; - pSMB->t2.TotalDataCount = 0; - pSMB->t2.MaxParameterCount = cpu_to_le16(4); - /* BB find exact max data count below from sess structure BB */ - pSMB->t2.MaxDataCount = cpu_to_le16(4000); - pSMB->t2.MaxSetupCount = 0; - pSMB->t2.Reserved = 0; - pSMB->t2.Flags = 0; - pSMB->t2.Timeout = 0; - pSMB->t2.Reserved2 = 0; - pSMB->t2.ParameterOffset = cpu_to_le16(offsetof(struct smb_t2_qfi_req, - Fid) - 4); - pSMB->t2.DataCount = 0; - pSMB->t2.DataOffset = 0; - pSMB->t2.SetupCount = 1; - pSMB->t2.Reserved3 = 0; - pSMB->t2.SubCommand = cpu_to_le16(TRANS2_QUERY_FILE_INFORMATION); - byte_count = params + 1 /* pad */ ; - pSMB->t2.TotalParameterCount = cpu_to_le16(params); - pSMB->t2.ParameterCount = pSMB->t2.TotalParameterCount; - pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); - pSMB->Pad = 0; + rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + params = 2 /* level */ +2 /* fid */; + pSMB->t2.TotalDataCount = 0; + pSMB->t2.MaxParameterCount = cpu_to_le16(4); + /* BB find exact max data count below from sess structure BB */ + pSMB->t2.MaxDataCount = cpu_to_le16(4000); + pSMB->t2.MaxSetupCount = 0; + pSMB->t2.Reserved = 0; + pSMB->t2.Flags = 0; + pSMB->t2.Timeout = 0; + pSMB->t2.Reserved2 = 0; + pSMB->t2.ParameterOffset = cpu_to_le16(offsetof(struct smb_t2_qfi_req, + Fid) - 4); + pSMB->t2.DataCount = 0; + pSMB->t2.DataOffset = 0; + pSMB->t2.SetupCount = 1; + pSMB->t2.Reserved3 = 0; + pSMB->t2.SubCommand = cpu_to_le16(TRANS2_QUERY_FILE_INFORMATION); + byte_count = params + 1 /* pad */ ; + pSMB->t2.TotalParameterCount = cpu_to_le16(params); + pSMB->t2.ParameterCount = pSMB->t2.TotalParameterCount; + pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); + pSMB->Pad = 0; pSMB->Fid = netfid; - pSMB->hdr.smb_buf_length += byte_count; - pSMB->t2.ByteCount = cpu_to_le16(byte_count); - - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); - if (rc) { - cFYI(1, ("error %d in GetExtAttr", rc)); - } else { - /* decode response */ - rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if (rc || (pSMBr->ByteCount < 2)) - /* BB also check enough total bytes returned */ - /* If rc should we check for EOPNOSUPP and - disable the srvino flag? or in caller? */ - rc = -EIO; /* bad smb */ - else { - __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - __u16 count = le16_to_cpu(pSMBr->t2.DataCount); - struct file_chattr_info * pfinfo; - /* BB Do we need a cast or hash here ? */ - if(count != 16) { - cFYI(1, ("Illegal size ret in GetExtAttr")); - rc = -EIO; - goto GetExtAttrOut; - } - pfinfo = (struct file_chattr_info *) - (data_offset + (char *) &pSMBr->hdr.Protocol); - *pExtAttrBits = le64_to_cpu(pfinfo->mode); + pSMB->hdr.smb_buf_length += byte_count; + pSMB->t2.ByteCount = cpu_to_le16(byte_count); + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) { + cFYI(1, ("error %d in GetExtAttr", rc)); + } else { + /* decode response */ + rc = validate_t2((struct smb_t2_rsp *)pSMBr); + if (rc || (pSMBr->ByteCount < 2)) + /* BB also check enough total bytes returned */ + /* If rc should we check for EOPNOSUPP and + disable the srvino flag? or in caller? */ + rc = -EIO; /* bad smb */ + else { + __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); + __u16 count = le16_to_cpu(pSMBr->t2.DataCount); + struct file_chattr_info *pfinfo; + /* BB Do we need a cast or hash here ? */ + if (count != 16) { + cFYI(1, ("Illegal size ret in GetExtAttr")); + rc = -EIO; + goto GetExtAttrOut; + } + pfinfo = (struct file_chattr_info *) + (data_offset + (char *) &pSMBr->hdr.Protocol); + *pExtAttrBits = le64_to_cpu(pfinfo->mode); *pMask = le64_to_cpu(pfinfo->mask); - } - } + } + } GetExtAttrOut: - cifs_buf_release(pSMB); - if (rc == -EAGAIN) - goto GetExtAttrRetry; - return rc; + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto GetExtAttrRetry; + return rc; } - #endif /* CONFIG_POSIX */ @@ -2955,7 +3051,7 @@ static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}}; /* Convert CIFS ACL to POSIX form */ -static int parse_sec_desc(struct cifs_sid * psec_desc, int acl_len) +static int parse_sec_desc(struct cifs_sid *psec_desc, int acl_len) { return 0; } @@ -2963,7 +3059,7 @@ static int parse_sec_desc(struct cifs_sid * psec_desc, int acl_len) /* Get Security Descriptor (by handle) from remote server for a file or dir */ int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, - /* BB fix up return info */ char *acl_inf, const int buflen, + /* BB fix up return info */ char *acl_inf, const int buflen, const int acl_type /* ACCESS/DEFAULT not sure implication */) { int rc = 0; @@ -2973,7 +3069,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, cFYI(1, ("GetCifsACL")); - rc = smb_init_ntransact(NT_TRANSACT_QUERY_SECURITY_DESC, 0, + rc = smb_init_ntransact(NT_TRANSACT_QUERY_SECURITY_DESC, 0, 8 /* parm len */, tcon, (void **) &pSMB); if (rc) return rc; @@ -2994,23 +3090,23 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, if (rc) { cFYI(1, ("Send error in QuerySecDesc = %d", rc)); } else { /* decode response */ - struct cifs_sid * psec_desc; + struct cifs_sid *psec_desc; __le32 * parm; int parm_len; int data_len; int acl_len; - struct smb_com_ntransact_rsp * pSMBr; + struct smb_com_ntransact_rsp *pSMBr; /* validate_nttransact */ - rc = validate_ntransact(iov[0].iov_base, (char **)&parm, + rc = validate_ntransact(iov[0].iov_base, (char **)&parm, (char **)&psec_desc, &parm_len, &data_len); - - if(rc) + if (rc) goto qsec_out; pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base; - cERROR(1,("smb %p parm %p data %p",pSMBr,parm,psec_desc)); /* BB removeme BB */ + cERROR(1, ("smb %p parm %p data %p", + pSMBr, parm, psec_desc)); /* BB removeme BB */ if (le32_to_cpu(pSMBr->ParameterCount) != 4) { rc = -EIO; /* bad smb */ @@ -3020,14 +3116,14 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, /* BB check that data area is minimum length and as big as acl_len */ acl_len = le32_to_cpu(*(__le32 *)parm); - /* BB check if(acl_len > bufsize) */ + /* BB check if (acl_len > bufsize) */ parse_sec_desc(psec_desc, acl_len); } qsec_out: - if(buf_type == CIFS_SMALL_BUFFER) + if (buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(iov[0].iov_base); - else if(buf_type == CIFS_LARGE_BUFFER) + else if (buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(iov[0].iov_base); /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ return rc; @@ -3036,9 +3132,9 @@ qsec_out: /* Legacy Query Path Information call for lookup to old servers such as Win9x/WinME */ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, - FILE_ALL_INFO * pFinfo, - const struct nls_table *nls_codepage, int remap) + const unsigned char *searchName, + FILE_ALL_INFO *pFinfo, + const struct nls_table *nls_codepage, int remap) { QUERY_INFORMATION_REQ * pSMB; QUERY_INFORMATION_RSP * pSMBr; @@ -3046,31 +3142,31 @@ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, int bytes_returned; int name_len; - cFYI(1, ("In SMBQPath path %s", searchName)); + cFYI(1, ("In SMBQPath path %s", searchName)); QInfRetry: rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB, - (void **) &pSMBr); + (void **) &pSMBr); if (rc) return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { + } else { name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); } pSMB->BufferFormat = 0x04; - name_len++; /* account for buffer type byte */ + name_len++; /* account for buffer type byte */ pSMB->hdr.smb_buf_length += (__u16) name_len; pSMB->ByteCount = cpu_to_le16(name_len); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); + (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cFYI(1, ("Send error in QueryInfo = %d", rc)); } else if (pFinfo) { /* decode response */ @@ -3127,17 +3223,17 @@ QPathInfoRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); } - params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */ ; + params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ @@ -3147,7 +3243,7 @@ QPathInfoRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -3156,7 +3252,7 @@ QPathInfoRetry: byte_count = params + 1 /* pad */ ; pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; - if(legacy) + if (legacy) pSMB->InformationLevel = cpu_to_le16(SMB_INFO_STANDARD); else pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); @@ -3173,16 +3269,18 @@ QPathInfoRetry: if (rc) /* BB add auto retry on EOPNOTSUPP? */ rc = -EIO; - else if (!legacy && (pSMBr->ByteCount < 40)) + else if (!legacy && (pSMBr->ByteCount < 40)) rc = -EIO; /* bad smb */ - else if(legacy && (pSMBr->ByteCount < 24)) - rc = -EIO; /* 24 or 26 expected but we do not read last field */ - else if (pFindData){ + else if (legacy && (pSMBr->ByteCount < 24)) + rc = -EIO; /* 24 or 26 expected but we do not read + last field */ + else if (pFindData) { int size; __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - if(legacy) /* we do not read the last field, EAsize, fortunately - since it varies by subdialect and on Set vs. Get, is - two bytes or 4 bytes depending but we don't care here */ + if (legacy) /* we do not read the last field, EAsize, + fortunately since it varies by subdialect + and on Set vs. Get, is two bytes or 4 + bytes depending but we don't care here */ size = sizeof(FILE_INFO_STANDARD); else size = sizeof(FILE_ALL_INFO); @@ -3226,24 +3324,24 @@ UnixQPathInfoRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); } - params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */ ; + params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); /* BB find exact max SMB PDU from sess structure BB */ - pSMB->MaxDataCount = cpu_to_le16(4000); + pSMB->MaxDataCount = cpu_to_le16(4000); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -3303,12 +3401,11 @@ findUniqueRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); @@ -3324,7 +3421,7 @@ findUniqueRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16( - offsetof(struct smb_com_transaction2_ffirst_req,InformationLevel) - 4); + offsetof(struct smb_com_transaction2_ffirst_req, InformationLevel)-4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; /* one byte, no need to le convert */ @@ -3364,10 +3461,10 @@ findUniqueRetry: /* xid, tcon, searchName and codepage are input parms, rest are returned */ int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, - const char *searchName, + const char *searchName, const struct nls_table *nls_codepage, - __u16 * pnetfid, - struct cifs_search_info * psrch_inf, int remap, const char dirsep) + __u16 *pnetfid, + struct cifs_search_info *psrch_inf, int remap, const char dirsep) { /* level 257 SMB_ */ TRANSACTION2_FFIRST_REQ *pSMB = NULL; @@ -3378,7 +3475,7 @@ CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, int name_len; __u16 params, byte_count; - cFYI(1, ("In FindFirst for %s",searchName)); + cFYI(1, ("In FindFirst for %s", searchName)); findFirstRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -3388,7 +3485,7 @@ findFirstRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName,searchName, + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX, nls_codepage, remap); /* We can not add the asterik earlier in case it got remapped to 0xF03A as if it were part of the @@ -3405,7 +3502,7 @@ findFirstRetry: } else { /* BB add check for overrun of SMB buf BB */ name_len = strnlen(searchName, PATH_MAX); /* BB fix here and in unicode clause above ie - if(name_len > buffersize-header) + if (name_len > buffersize-header) free buffer exit; BB */ strncpy(pSMB->FileName, searchName, name_len); pSMB->FileName[name_len] = dirsep; @@ -3438,8 +3535,8 @@ findFirstRetry: pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); - pSMB->SearchCount= cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO)); - pSMB->SearchFlags = cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | + pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize/sizeof(FILE_UNIX_INFO)); + pSMB->SearchFlags = cpu_to_le16(CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME); pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); @@ -3466,7 +3563,7 @@ findFirstRetry: } else { /* decode response */ /* BB remember to free buffer if error BB */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if(rc == 0) { + if (rc == 0) { if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) psrch_inf->unicode = TRUE; else @@ -3474,18 +3571,19 @@ findFirstRetry: psrch_inf->ntwrk_buf_start = (char *)pSMBr; psrch_inf->smallBuf = 0; - psrch_inf->srch_entries_start = - (char *) &pSMBr->hdr.Protocol + + psrch_inf->srch_entries_start = + (char *) &pSMBr->hdr.Protocol + le16_to_cpu(pSMBr->t2.DataOffset); parms = (T2_FFIRST_RSP_PARMS *)((char *) &pSMBr->hdr.Protocol + le16_to_cpu(pSMBr->t2.ParameterOffset)); - if(parms->EndofSearch) + if (parms->EndofSearch) psrch_inf->endOfSearch = TRUE; else psrch_inf->endOfSearch = FALSE; - psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); + psrch_inf->entries_in_buffer = + le16_to_cpu(parms->SearchCount); psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + psrch_inf->entries_in_buffer; *pnetfid = parms->SearchHandle; @@ -3498,7 +3596,7 @@ findFirstRetry: } int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, - __u16 searchHandle, struct cifs_search_info * psrch_inf) + __u16 searchHandle, struct cifs_search_info *psrch_inf) { TRANSACTION2_FNEXT_REQ *pSMB = NULL; TRANSACTION2_FNEXT_RSP *pSMBr = NULL; @@ -3510,7 +3608,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("In FindNext")); - if(psrch_inf->endOfSearch == TRUE) + if (psrch_inf->endOfSearch == TRUE) return -ENOENT; rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -3518,12 +3616,13 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, if (rc) return rc; - params = 14; /* includes 2 bytes of null string, converted to LE below */ + params = 14; /* includes 2 bytes of null string, converted to LE below*/ byte_count = 0; pSMB->TotalDataCount = 0; /* no EAs */ pSMB->MaxParameterCount = cpu_to_le16(8); pSMB->MaxDataCount = - cpu_to_le16((tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); + cpu_to_le16((tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & + 0xFFFFFF00); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; @@ -3539,15 +3638,6 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, pSMB->SearchHandle = searchHandle; /* always kept as le */ pSMB->SearchCount = cpu_to_le16(CIFSMaxBufSize / sizeof (FILE_UNIX_INFO)); - /* test for Unix extensions */ -/* if (tcon->ses->capabilities & CAP_UNIX) { - pSMB->InformationLevel = cpu_to_le16(SMB_FIND_FILE_UNIX); - psrch_inf->info_level = SMB_FIND_FILE_UNIX; - } else { - pSMB->InformationLevel = - cpu_to_le16(SMB_FIND_FILE_DIRECTORY_INFO); - psrch_inf->info_level = SMB_FIND_FILE_DIRECTORY_INFO; - } */ pSMB->InformationLevel = cpu_to_le16(psrch_inf->info_level); pSMB->ResumeKey = psrch_inf->resume_key; pSMB->SearchFlags = @@ -3555,7 +3645,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, name_len = psrch_inf->resume_name_len; params += name_len; - if(name_len < PATH_MAX) { + if (name_len < PATH_MAX) { memcpy(pSMB->ResumeFileName, psrch_inf->presume_name, name_len); byte_count += name_len; /* 14 byte parm len above enough for 2 byte null terminator */ @@ -3570,20 +3660,20 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->num_fnext); if (rc) { if (rc == -EBADF) { psrch_inf->endOfSearch = TRUE; - rc = 0; /* search probably was closed at end of search above */ + rc = 0; /* search probably was closed at end of search*/ } else cFYI(1, ("FindNext returned = %d", rc)); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); - - if(rc == 0) { + + if (rc == 0) { /* BB fixme add lock for file (srch_info) struct here */ if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) psrch_inf->unicode = TRUE; @@ -3594,7 +3684,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, parms = (T2_FNEXT_RSP_PARMS *)response_data; response_data = (char *)&pSMBr->hdr.Protocol + le16_to_cpu(pSMBr->t2.DataOffset); - if(psrch_inf->smallBuf) + if (psrch_inf->smallBuf) cifs_small_buf_release( psrch_inf->ntwrk_buf_start); else @@ -3602,15 +3692,16 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, psrch_inf->srch_entries_start = response_data; psrch_inf->ntwrk_buf_start = (char *)pSMB; psrch_inf->smallBuf = 0; - if(parms->EndofSearch) + if (parms->EndofSearch) psrch_inf->endOfSearch = TRUE; else psrch_inf->endOfSearch = FALSE; - - psrch_inf->entries_in_buffer = le16_to_cpu(parms->SearchCount); + psrch_inf->entries_in_buffer = + le16_to_cpu(parms->SearchCount); psrch_inf->index_of_last_entry += psrch_inf->entries_in_buffer; -/* cFYI(1,("fnxt2 entries in buf %d index_of_last %d",psrch_inf->entries_in_buffer,psrch_inf->index_of_last_entry)); */ +/* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", + psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ /* BB fixme add unlock here */ } @@ -3625,12 +3716,12 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, FNext2_err_exit: if (rc != 0) cifs_buf_release(pSMB); - return rc; } int -CIFSFindClose(const int xid, struct cifsTconInfo *tcon, const __u16 searchHandle) +CIFSFindClose(const int xid, struct cifsTconInfo *tcon, + const __u16 searchHandle) { int rc = 0; FINDCLOSE_REQ *pSMB = NULL; @@ -3642,7 +3733,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon, const __u16 searchHandle /* no sense returning error if session restarted as file handle has been closed */ - if(rc == -EAGAIN) + if (rc == -EAGAIN) return 0; if (rc) return rc; @@ -3667,9 +3758,9 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon, const __u16 searchHandle int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, - const unsigned char *searchName, - __u64 * inode_number, - const struct nls_table *nls_codepage, int remap) + const unsigned char *searchName, + __u64 * inode_number, + const struct nls_table *nls_codepage, int remap) { int rc = 0; TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -3677,24 +3768,23 @@ CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, int name_len, bytes_returned; __u16 params, byte_count; - cFYI(1,("In GetSrvInodeNum for %s",searchName)); - if(tcon == NULL) - return -ENODEV; + cFYI(1, ("In GetSrvInodeNum for %s", searchName)); + if (tcon == NULL) + return -ENODEV; GetInodeNumberRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, - (void **) &pSMBr); + (void **) &pSMBr); if (rc) return rc; - if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX,nls_codepage, remap); + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, searchName, name_len); @@ -3711,7 +3801,7 @@ GetInodeNumberRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -3737,12 +3827,12 @@ GetInodeNumberRetry: /* If rc should we check for EOPNOSUPP and disable the srvino flag? or in caller? */ rc = -EIO; /* bad smb */ - else { + else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); __u16 count = le16_to_cpu(pSMBr->t2.DataCount); - struct file_internal_info * pfinfo; + struct file_internal_info *pfinfo; /* BB Do we need a cast or hash here ? */ - if(count < 8) { + if (count < 8) { cFYI(1, ("Illegal size ret in QryIntrnlInf")); rc = -EIO; goto GetInodeNumOut; @@ -3769,12 +3859,12 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses, /* TRANS2_GET_DFS_REFERRAL */ TRANSACTION2_GET_DFS_REFER_REQ *pSMB = NULL; TRANSACTION2_GET_DFS_REFER_RSP *pSMBr = NULL; - struct dfs_referral_level_3 * referrals = NULL; + struct dfs_referral_level_3 *referrals = NULL; int rc = 0; int bytes_returned; int name_len; unsigned int i; - char * temp; + char *temp; __u16 params, byte_count; *number_of_UNC_in_array = 0; *targetUNCs = NULL; @@ -3787,8 +3877,8 @@ getDFSRetry: (void **) &pSMBr); if (rc) return rc; - - /* server pointer checked in called function, + + /* server pointer checked in called function, but should never be null here anyway */ pSMB->hdr.Mid = GetNextMid(ses->server); pSMB->hdr.Tid = ses->ipc_tid; @@ -3807,19 +3897,19 @@ getDFSRetry: searchName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(searchName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->RequestFileName, searchName, name_len); } - if(ses->server) { - if(ses->server->secMode & + if (ses->server) { + if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; } - pSMB->hdr.Uid = ses->Suid; + pSMB->hdr.Uid = ses->Suid; params = 2 /* level */ + name_len /*includes null */ ; pSMB->TotalDataCount = 0; @@ -3833,7 +3923,7 @@ getDFSRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_get_dfs_refer_req, MaxReferralLevel) - 4); + struct smb_com_transaction2_get_dfs_refer_req, MaxReferralLevel) - 4); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_GET_DFS_REFERRAL); @@ -3852,74 +3942,87 @@ getDFSRetry: /* BB Add logic to parse referrals here */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if (rc || (pSMBr->ByteCount < 17)) /* BB also check enough total bytes returned */ + /* BB Also check if enough total bytes returned? */ + if (rc || (pSMBr->ByteCount < 17)) rc = -EIO; /* bad smb */ else { - __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); + __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); __u16 data_count = le16_to_cpu(pSMBr->t2.DataCount); cFYI(1, - ("Decoding GetDFSRefer response. BCC: %d Offset %d", + ("Decoding GetDFSRefer response BCC: %d Offset %d", pSMBr->ByteCount, data_offset)); - referrals = - (struct dfs_referral_level_3 *) + referrals = + (struct dfs_referral_level_3 *) (8 /* sizeof start of data block */ + data_offset + - (char *) &pSMBr->hdr.Protocol); - cFYI(1,("num_referrals: %d dfs flags: 0x%x ... \nfor referral one refer size: 0x%x srv type: 0x%x refer flags: 0x%x ttl: 0x%x", - le16_to_cpu(pSMBr->NumberOfReferrals),le16_to_cpu(pSMBr->DFSFlags), le16_to_cpu(referrals->ReferralSize),le16_to_cpu(referrals->ServerType),le16_to_cpu(referrals->ReferralFlags),le16_to_cpu(referrals->TimeToLive))); + (char *) &pSMBr->hdr.Protocol); + cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n" + "for referral one refer size: 0x%x srv " + "type: 0x%x refer flags: 0x%x ttl: 0x%x", + le16_to_cpu(pSMBr->NumberOfReferrals), + le16_to_cpu(pSMBr->DFSFlags), + le16_to_cpu(referrals->ReferralSize), + le16_to_cpu(referrals->ServerType), + le16_to_cpu(referrals->ReferralFlags), + le16_to_cpu(referrals->TimeToLive))); /* BB This field is actually two bytes in from start of data block so we could do safety check that DataBlock begins at address of pSMBr->NumberOfReferrals */ - *number_of_UNC_in_array = le16_to_cpu(pSMBr->NumberOfReferrals); + *number_of_UNC_in_array = + le16_to_cpu(pSMBr->NumberOfReferrals); /* BB Fix below so can return more than one referral */ - if(*number_of_UNC_in_array > 1) + if (*number_of_UNC_in_array > 1) *number_of_UNC_in_array = 1; /* get the length of the strings describing refs */ name_len = 0; - for(i=0;i<*number_of_UNC_in_array;i++) { + for (i = 0; i < *number_of_UNC_in_array; i++) { /* make sure that DfsPathOffset not past end */ - __u16 offset = le16_to_cpu(referrals->DfsPathOffset); + __u16 offset = + le16_to_cpu(referrals->DfsPathOffset); if (offset > data_count) { - /* if invalid referral, stop here and do + /* if invalid referral, stop here and do not try to copy any more */ *number_of_UNC_in_array = i; break; - } + } temp = ((char *)referrals) + offset; if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len += UniStrnlen((wchar_t *)temp,data_count); + name_len += UniStrnlen((wchar_t *)temp, + data_count); } else { - name_len += strnlen(temp,data_count); + name_len += strnlen(temp, data_count); } referrals++; - /* BB add check that referral pointer does not fall off end PDU */ - + /* BB add check that referral pointer does + not fall off end PDU */ } /* BB add check for name_len bigger than bcc */ - *targetUNCs = - kmalloc(name_len+1+ (*number_of_UNC_in_array),GFP_KERNEL); - if(*targetUNCs == NULL) { + *targetUNCs = + kmalloc(name_len+1+(*number_of_UNC_in_array), + GFP_KERNEL); + if (*targetUNCs == NULL) { rc = -ENOMEM; goto GetDFSRefExit; } /* copy the ref strings */ - referrals = - (struct dfs_referral_level_3 *) - (8 /* sizeof data hdr */ + - data_offset + + referrals = (struct dfs_referral_level_3 *) + (8 /* sizeof data hdr */ + data_offset + (char *) &pSMBr->hdr.Protocol); - for(i=0;i<*number_of_UNC_in_array;i++) { - temp = ((char *)referrals) + le16_to_cpu(referrals->DfsPathOffset); + for (i = 0; i < *number_of_UNC_in_array; i++) { + temp = ((char *)referrals) + + le16_to_cpu(referrals->DfsPathOffset); if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { cifs_strfromUCS_le(*targetUNCs, - (__le16 *) temp, name_len, nls_codepage); + (__le16 *) temp, + name_len, + nls_codepage); } else { - strncpy(*targetUNCs,temp,name_len); + strncpy(*targetUNCs, temp, name_len); } /* BB update target_uncs pointers */ referrals++; @@ -3996,18 +4099,17 @@ oldQFSInfoRetry: rc = -EIO; /* bad smb */ else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - cFYI(1,("qfsinf resp BCC: %d Offset %d", + cFYI(1, ("qfsinf resp BCC: %d Offset %d", pSMBr->ByteCount, data_offset)); - response_data = - (FILE_SYSTEM_ALLOC_INFO *) + response_data = (FILE_SYSTEM_ALLOC_INFO *) (((char *) &pSMBr->hdr.Protocol) + data_offset); FSData->f_bsize = le16_to_cpu(response_data->BytesPerSector) * le32_to_cpu(response_data-> SectorsPerAllocationUnit); FSData->f_blocks = - le32_to_cpu(response_data->TotalAllocationUnits); + le32_to_cpu(response_data->TotalAllocationUnits); FSData->f_bfree = FSData->f_bavail = le32_to_cpu(response_data->FreeAllocationUnits); cFYI(1, @@ -4056,7 +4158,7 @@ QFSInfoRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -4071,7 +4173,7 @@ QFSInfoRetry: if (rc) { cFYI(1, ("Send error in QFSInfo = %d", rc)); } else { /* decode response */ - rc = validate_t2((struct smb_t2_rsp *)pSMBr); + rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || (pSMBr->ByteCount < 24)) rc = -EIO; /* bad smb */ @@ -4136,7 +4238,7 @@ QFSAttributeRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -4153,7 +4255,8 @@ QFSAttributeRetry: } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); - if (rc || (pSMBr->ByteCount < 13)) { /* BB also check enough bytes returned */ + if (rc || (pSMBr->ByteCount < 13)) { + /* BB also check if enough bytes returned */ rc = -EIO; /* bad smb */ } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); @@ -4204,7 +4307,7 @@ QFSDeviceRetry: pSMB->TotalParameterCount = cpu_to_le16(params); pSMB->ParameterCount = pSMB->TotalParameterCount; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); + struct smb_com_transaction2_qfsi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; @@ -4274,8 +4377,8 @@ QFSUnixRetry: byte_count = params + 1 /* pad */ ; pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; - pSMB->ParameterOffset = cpu_to_le16(offsetof(struct - smb_com_transaction2_qfsi_req, InformationLevel) - 4); + pSMB->ParameterOffset = cpu_to_le16(offsetof(struct + smb_com_transaction2_qfsi_req, InformationLevel) - 4); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); @@ -4335,7 +4438,8 @@ SETFSUnixRetry: pSMB->Flags = 0; pSMB->Timeout = 0; pSMB->Reserved2 = 0; - param_offset = offsetof(struct smb_com_transaction2_setfsi_req, FileNum) - 4; + param_offset = offsetof(struct smb_com_transaction2_setfsi_req, FileNum) + - 4; offset = param_offset + params; pSMB->MaxParameterCount = cpu_to_le16(4); @@ -4417,8 +4521,8 @@ QFSPosixRetry: byte_count = params + 1 /* pad */ ; pSMB->ParameterCount = cpu_to_le16(params); pSMB->TotalParameterCount = pSMB->ParameterCount; - pSMB->ParameterOffset = cpu_to_le16(offsetof(struct - smb_com_transaction2_qfsi_req, InformationLevel) - 4); + pSMB->ParameterOffset = cpu_to_le16(offsetof(struct + smb_com_transaction2_qfsi_req, InformationLevel) - 4); pSMB->SetupCount = 1; pSMB->Reserved3 = 0; pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); @@ -4447,18 +4551,18 @@ QFSPosixRetry: le64_to_cpu(response_data->TotalBlocks); FSData->f_bfree = le64_to_cpu(response_data->BlocksAvail); - if(response_data->UserBlocksAvail == cpu_to_le64(-1)) { + if (response_data->UserBlocksAvail == cpu_to_le64(-1)) { FSData->f_bavail = FSData->f_bfree; } else { FSData->f_bavail = - le64_to_cpu(response_data->UserBlocksAvail); + le64_to_cpu(response_data->UserBlocksAvail); } - if(response_data->TotalFileNodes != cpu_to_le64(-1)) + if (response_data->TotalFileNodes != cpu_to_le64(-1)) FSData->f_files = - le64_to_cpu(response_data->TotalFileNodes); - if(response_data->FreeFileNodes != cpu_to_le64(-1)) + le64_to_cpu(response_data->TotalFileNodes); + if (response_data->FreeFileNodes != cpu_to_le64(-1)) FSData->f_ffree = - le64_to_cpu(response_data->FreeFileNodes); + le64_to_cpu(response_data->FreeFileNodes); } } cifs_buf_release(pSMB); @@ -4470,15 +4574,15 @@ QFSPosixRetry: } -/* We can not use write of zero bytes trick to - set file size due to need for large file support. Also note that - this SetPathInfo is preferred to SetFileInfo based method in next +/* We can not use write of zero bytes trick to + set file size due to need for large file support. Also note that + this SetPathInfo is preferred to SetFileInfo based method in next routine which is only needed to work around a sharing violation bug in Samba which this routine can run into */ int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName, - __u64 size, int SetAllocation, + __u64 size, int SetAllocation, const struct nls_table *nls_codepage, int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; @@ -4517,22 +4621,22 @@ SetEOFRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; - if(SetAllocation) { - if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) - pSMB->InformationLevel = - cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); - else - pSMB->InformationLevel = - cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO); - } else /* Set File Size */ { + if (SetAllocation) { + if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) + pSMB->InformationLevel = + cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); + else + pSMB->InformationLevel = + cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO); + } else /* Set File Size */ { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = - cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO2); + cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO2); else pSMB->InformationLevel = - cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); + cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); } parm_data = @@ -4567,8 +4671,8 @@ SetEOFRetry: } int -CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, - __u16 fid, __u32 pid_of_opener, int SetAllocation) +CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, + __u16 fid, __u32 pid_of_opener, int SetAllocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; @@ -4589,7 +4693,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); - + params = 6; pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; @@ -4599,7 +4703,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; + data_offset = (char *) (&pSMB->hdr.Protocol) + offset; count = sizeof(struct file_end_of_file_info); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -4614,25 +4718,25 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, pSMB->TotalParameterCount = pSMB->ParameterCount; pSMB->ParameterOffset = cpu_to_le16(param_offset); parm_data = - (struct file_end_of_file_info *) (((char *) &pSMB->hdr.Protocol) + - offset); + (struct file_end_of_file_info *) (((char *) &pSMB->hdr.Protocol) + + offset); pSMB->DataOffset = cpu_to_le16(offset); parm_data->FileSize = cpu_to_le64(size); pSMB->Fid = fid; - if(SetAllocation) { + if (SetAllocation) { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); else pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO); - } else /* Set File Size */ { + } else /* Set File Size */ { if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) pSMB->InformationLevel = - cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO2); + cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO2); else pSMB->InformationLevel = - cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); + cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); } pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; @@ -4648,21 +4752,21 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, if (pSMB) cifs_small_buf_release(pSMB); - /* Note: On -EAGAIN error only caller can retry on handle based calls + /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ return rc; } -/* Some legacy servers such as NT4 require that the file times be set on +/* Some legacy servers such as NT4 require that the file times be set on an open handle, rather than by pathname - this is awkward due to potential access conflicts on the open, but it is unavoidable for these old servers since the only other choice is to go from 100 nanosecond DCE time and resort to the original setpathinfo level which takes the ancient DOS time format with 2 second granularity */ int -CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, const FILE_BASIC_INFO * data, - __u16 fid) +CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, + const FILE_BASIC_INFO *data, __u16 fid) { struct smb_com_transaction2_sfi_req *pSMB = NULL; struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; @@ -4684,7 +4788,7 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, const FILE_BASIC_I use an existing handle (rather than opening one on the fly) */ /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/ - + params = 6; pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; @@ -4694,7 +4798,7 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, const FILE_BASIC_I param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; + data_offset = (char *) (&pSMB->hdr.Protocol) + offset; count = sizeof (FILE_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -4717,16 +4821,16 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, const FILE_BASIC_I pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - memcpy(data_offset,data,sizeof(FILE_BASIC_INFO)); + memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1,("Send error in Set Time (SetFileInfo) = %d",rc)); + cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); } cifs_small_buf_release(pSMB); - /* Note: On -EAGAIN error only caller can retry on handle based calls + /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ return rc; @@ -4735,7 +4839,7 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, const FILE_BASIC_I int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName, - const FILE_BASIC_INFO * data, + const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; @@ -4760,7 +4864,7 @@ SetTimesRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fileName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, fileName, name_len); @@ -4776,7 +4880,7 @@ SetTimesRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; data_offset = (char *) (&pSMB->hdr.Protocol) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); @@ -4837,11 +4941,11 @@ SetAttrLgcyRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - ConvertToUCS((__le16 *) pSMB->fileName, fileName, + ConvertToUCS((__le16 *) pSMB->fileName, fileName, PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fileName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->fileName, fileName, name_len); @@ -4867,8 +4971,8 @@ SetAttrLgcyRetry: int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, - char *fileName, __u64 mode, __u64 uid, __u64 gid, - dev_t device, const struct nls_table *nls_codepage, + char *fileName, __u64 mode, __u64 uid, __u64 gid, + dev_t device, const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; @@ -4888,7 +4992,7 @@ setPermsRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, + cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -4908,7 +5012,7 @@ setPermsRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; data_offset = (FILE_UNIX_BASIC_INFO *) ((char *) &pSMB->hdr.Protocol + @@ -4931,7 +5035,7 @@ setPermsRetry: older clients, but we should be precise - we use SetFileSize to set file size and do not want to truncate file size to zero accidently as happened on one Samba server beta by putting - zero instead of -1 here */ + zero instead of -1 here */ data_offset->EndOfFile = NO_CHANGE_64; data_offset->NumOfBytes = NO_CHANGE_64; data_offset->LastStatusChange = NO_CHANGE_64; @@ -4943,20 +5047,20 @@ setPermsRetry: data_offset->DevMajor = cpu_to_le64(MAJOR(device)); data_offset->DevMinor = cpu_to_le64(MINOR(device)); data_offset->Permissions = cpu_to_le64(mode); - - if(S_ISREG(mode)) + + if (S_ISREG(mode)) data_offset->Type = cpu_to_le32(UNIX_FILE); - else if(S_ISDIR(mode)) + else if (S_ISDIR(mode)) data_offset->Type = cpu_to_le32(UNIX_DIR); - else if(S_ISLNK(mode)) + else if (S_ISLNK(mode)) data_offset->Type = cpu_to_le32(UNIX_SYMLINK); - else if(S_ISCHR(mode)) + else if (S_ISCHR(mode)) data_offset->Type = cpu_to_le32(UNIX_CHARDEV); - else if(S_ISBLK(mode)) + else if (S_ISBLK(mode)) data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV); - else if(S_ISFIFO(mode)) + else if (S_ISFIFO(mode)) data_offset->Type = cpu_to_le32(UNIX_FIFO); - else if(S_ISSOCK(mode)) + else if (S_ISSOCK(mode)) data_offset->Type = cpu_to_le32(UNIX_SOCKET); @@ -4974,20 +5078,20 @@ setPermsRetry: return rc; } -int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, +int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, const int notify_subdirs, const __u16 netfid, - __u32 filter, struct file * pfile, int multishot, + __u32 filter, struct file *pfile, int multishot, const struct nls_table *nls_codepage) { int rc = 0; - struct smb_com_transaction_change_notify_req * pSMB = NULL; - struct smb_com_ntransaction_change_notify_rsp * pSMBr = NULL; + struct smb_com_transaction_change_notify_req *pSMB = NULL; + struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL; struct dir_notify_req *dnotify_req; int bytes_returned; - cFYI(1, ("In CIFSSMBNotify for file handle %d",(int)netfid)); + cFYI(1, ("In CIFSSMBNotify for file handle %d", (int)netfid)); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, - (void **) &pSMBr); + (void **) &pSMBr); if (rc) return rc; @@ -5008,7 +5112,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, pSMB->SetupCount = 4; /* single byte does not need le conversion */ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE); pSMB->ParameterCount = pSMB->TotalParameterCount; - if(notify_subdirs) + if (notify_subdirs) pSMB->WatchTree = 1; /* one byte - no le conversion needed */ pSMB->Reserved2 = 0; pSMB->CompletionFilter = cpu_to_le32(filter); @@ -5021,11 +5125,11 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, cFYI(1, ("Error in Notify = %d", rc)); } else { /* Add file to outstanding requests */ - /* BB change to kmem cache alloc */ + /* BB change to kmem cache alloc */ dnotify_req = kmalloc( sizeof(struct dir_notify_req), GFP_KERNEL); - if(dnotify_req) { + if (dnotify_req) { dnotify_req->Pid = pSMB->hdr.Pid; dnotify_req->PidHigh = pSMB->hdr.PidHigh; dnotify_req->Mid = pSMB->hdr.Mid; @@ -5036,20 +5140,20 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon, dnotify_req->filter = filter; dnotify_req->multishot = multishot; spin_lock(&GlobalMid_Lock); - list_add_tail(&dnotify_req->lhead, + list_add_tail(&dnotify_req->lhead, &GlobalDnotifyReqList); spin_unlock(&GlobalMid_Lock); - } else + } else rc = -ENOMEM; } cifs_buf_release(pSMB); - return rc; + return rc; } #ifdef CONFIG_CIFS_XATTR ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, const unsigned char *searchName, - char * EAData, size_t buf_size, + char *EAData, size_t buf_size, const struct nls_table *nls_codepage, int remap) { /* BB assumes one setup word */ @@ -5058,8 +5162,8 @@ CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, int rc = 0; int bytes_returned; int name_len; - struct fea * temp_fea; - char * temp_ptr; + struct fea *temp_fea; + char *temp_ptr; __u16 params, byte_count; cFYI(1, ("In Query All EAs path %s", searchName)); @@ -5071,7 +5175,7 @@ QAllEAsRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -5081,7 +5185,7 @@ QAllEAsRetry: strncpy(pSMB->FileName, searchName, name_len); } - params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */ ; + params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ @@ -5091,7 +5195,7 @@ QAllEAsRetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -5115,7 +5219,7 @@ QAllEAsRetry: /* BB also check enough total bytes returned */ /* BB we need to improve the validity checking of these trans2 responses */ - if (rc || (pSMBr->ByteCount < 4)) + if (rc || (pSMBr->ByteCount < 4)) rc = -EIO; /* bad smb */ /* else if (pFindData){ memcpy((char *) pFindData, @@ -5128,39 +5232,40 @@ QAllEAsRetry: /* check that each element of each entry does not go beyond end of list */ __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - struct fealist * ea_response_data; + struct fealist *ea_response_data; rc = 0; /* validate_trans2_offsets() */ - /* BB to check if(start of smb + data_offset > &bcc+ bcc)*/ + /* BB check if start of smb + data_offset > &bcc+ bcc */ ea_response_data = (struct fealist *) (((char *) &pSMBr->hdr.Protocol) + data_offset); name_len = le32_to_cpu(ea_response_data->list_len); - cFYI(1,("ea length %d", name_len)); - if(name_len <= 8) { + cFYI(1, ("ea length %d", name_len)); + if (name_len <= 8) { /* returned EA size zeroed at top of function */ - cFYI(1,("empty EA list returned from server")); + cFYI(1, ("empty EA list returned from server")); } else { /* account for ea list len */ name_len -= 4; temp_fea = ea_response_data->list; temp_ptr = (char *)temp_fea; - while(name_len > 0) { + while (name_len > 0) { __u16 value_len; name_len -= 4; temp_ptr += 4; rc += temp_fea->name_len; /* account for prefix user. and trailing null */ - rc = rc + 5 + 1; - if(rc<(int)buf_size) { - memcpy(EAData,"user.",5); - EAData+=5; - memcpy(EAData,temp_ptr,temp_fea->name_len); - EAData+=temp_fea->name_len; + rc = rc + 5 + 1; + if (rc < (int)buf_size) { + memcpy(EAData, "user.", 5); + EAData += 5; + memcpy(EAData, temp_ptr, + temp_fea->name_len); + EAData += temp_fea->name_len; /* null terminate name */ *EAData = 0; EAData = EAData + 1; - } else if(buf_size == 0) { + } else if (buf_size == 0) { /* skip copy - calc size only */ } else { /* stop before overrun buffer */ @@ -5172,11 +5277,15 @@ QAllEAsRetry: /* account for trailing null */ name_len--; temp_ptr++; - value_len = le16_to_cpu(temp_fea->value_len); + value_len = + le16_to_cpu(temp_fea->value_len); name_len -= value_len; temp_ptr += value_len; - /* BB check that temp_ptr is still within smb BB*/ - /* no trailing null to account for in value len */ + /* BB check that temp_ptr is still + within the SMB BB*/ + + /* no trailing null to account for + in value len */ /* go on to next EA */ temp_fea = (struct fea *)temp_ptr; } @@ -5191,9 +5300,9 @@ QAllEAsRetry: return (ssize_t)rc; } -ssize_t CIFSSMBQueryEA(const int xid,struct cifsTconInfo * tcon, - const unsigned char * searchName,const unsigned char * ea_name, - unsigned char * ea_value, size_t buf_size, +ssize_t CIFSSMBQueryEA(const int xid, struct cifsTconInfo *tcon, + const unsigned char *searchName, const unsigned char *ea_name, + unsigned char *ea_value, size_t buf_size, const struct nls_table *nls_codepage, int remap) { TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -5201,8 +5310,8 @@ ssize_t CIFSSMBQueryEA(const int xid,struct cifsTconInfo * tcon, int rc = 0; int bytes_returned; int name_len; - struct fea * temp_fea; - char * temp_ptr; + struct fea *temp_fea; + char *temp_ptr; __u16 params, byte_count; cFYI(1, ("In Query EA path %s", searchName)); @@ -5214,7 +5323,7 @@ QEARetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, + cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -5224,7 +5333,7 @@ QEARetry: strncpy(pSMB->FileName, searchName, name_len); } - params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */ ; + params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); pSMB->MaxDataCount = cpu_to_le16(4000); /* BB find exact max SMB PDU from sess structure BB */ @@ -5234,7 +5343,7 @@ QEARetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; pSMB->ParameterOffset = cpu_to_le16(offsetof( - struct smb_com_transaction2_qpi_req ,InformationLevel) - 4); + struct smb_com_transaction2_qpi_req, InformationLevel) - 4); pSMB->DataCount = 0; pSMB->DataOffset = 0; pSMB->SetupCount = 1; @@ -5258,7 +5367,7 @@ QEARetry: /* BB also check enough total bytes returned */ /* BB we need to improve the validity checking of these trans2 responses */ - if (rc || (pSMBr->ByteCount < 4)) + if (rc || (pSMBr->ByteCount < 4)) rc = -EIO; /* bad smb */ /* else if (pFindData){ memcpy((char *) pFindData, @@ -5271,18 +5380,18 @@ QEARetry: /* check that each element of each entry does not go beyond end of list */ __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - struct fealist * ea_response_data; + struct fealist *ea_response_data; rc = -ENODATA; /* validate_trans2_offsets() */ - /* BB to check if(start of smb + data_offset > &bcc+ bcc)*/ + /* BB check if start of smb + data_offset > &bcc+ bcc*/ ea_response_data = (struct fealist *) (((char *) &pSMBr->hdr.Protocol) + data_offset); name_len = le32_to_cpu(ea_response_data->list_len); - cFYI(1,("ea length %d", name_len)); - if(name_len <= 8) { + cFYI(1, ("ea length %d", name_len)); + if (name_len <= 8) { /* returned EA size zeroed at top of function */ - cFYI(1,("empty EA list returned from server")); + cFYI(1, ("empty EA list returned from server")); } else { /* account for ea list len */ name_len -= 4; @@ -5290,28 +5399,30 @@ QEARetry: temp_ptr = (char *)temp_fea; /* loop through checking if we have a matching name and then return the associated value */ - while(name_len > 0) { + while (name_len > 0) { __u16 value_len; name_len -= 4; temp_ptr += 4; - value_len = le16_to_cpu(temp_fea->value_len); - /* BB validate that value_len falls within SMB, - even though maximum for name_len is 255 */ - if(memcmp(temp_fea->name,ea_name, + value_len = + le16_to_cpu(temp_fea->value_len); + /* BB validate that value_len falls within SMB, + even though maximum for name_len is 255 */ + if (memcmp(temp_fea->name, ea_name, temp_fea->name_len) == 0) { /* found a match */ rc = value_len; /* account for prefix user. and trailing null */ - if(rc<=(int)buf_size) { + if (rc <= (int)buf_size) { memcpy(ea_value, temp_fea->name+temp_fea->name_len+1, rc); - /* ea values, unlike ea names, - are not null terminated */ - } else if(buf_size == 0) { + /* ea values, unlike ea + names, are not null + terminated */ + } else if (buf_size == 0) { /* skip copy - calc size only */ } else { - /* stop before overrun buffer */ + /* stop before overrun buffer */ rc = -ERANGE; } break; @@ -5323,11 +5434,11 @@ QEARetry: temp_ptr++; name_len -= value_len; temp_ptr += value_len; - /* no trailing null to account for in value len */ - /* go on to next EA */ + /* No trailing null to account for in + value_len. Go on to next EA */ temp_fea = (struct fea *)temp_ptr; } - } + } } } if (pSMB) @@ -5340,9 +5451,9 @@ QEARetry: int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, const char *fileName, - const char * ea_name, const void * ea_value, - const __u16 ea_value_len, const struct nls_table *nls_codepage, - int remap) + const char *ea_name, const void *ea_value, + const __u16 ea_value_len, const struct nls_table *nls_codepage, + int remap) { struct smb_com_transaction2_spi_req *pSMB = NULL; struct smb_com_transaction2_spi_rsp *pSMBr = NULL; @@ -5361,11 +5472,11 @@ SetEARetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, + cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ + } else { /* BB improve the check for buffer overruns BB */ name_len = strnlen(fileName, PATH_MAX); name_len++; /* trailing null */ strncpy(pSMB->FileName, fileName, name_len); @@ -5376,10 +5487,10 @@ SetEARetry: /* done calculating parms using name_len of file name, now use name_len to calculate length of ea name we are going to create in the inode xattrs */ - if(ea_name == NULL) + if (ea_name == NULL) name_len = 0; else - name_len = strnlen(ea_name,255); + name_len = strnlen(ea_name, 255); count = sizeof(*parm_data) + ea_value_len + name_len + 1; pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5390,7 +5501,7 @@ SetEARetry: pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_spi_req, - InformationLevel) - 4; + InformationLevel) - 4; offset = param_offset + params; pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_EA); @@ -5410,17 +5521,19 @@ SetEARetry: /* we checked above that name len is less than 255 */ parm_data->list[0].name_len = (__u8)name_len; /* EA names are always ASCII */ - if(ea_name) - strncpy(parm_data->list[0].name,ea_name,name_len); + if (ea_name) + strncpy(parm_data->list[0].name, ea_name, name_len); parm_data->list[0].name[name_len] = 0; parm_data->list[0].value_len = cpu_to_le16(ea_value_len); /* caller ensures that ea_value_len is less than 64K but we need to ensure that it fits within the smb */ - /*BB add length check that it would fit in negotiated SMB buffer size BB */ - /* if(ea_value_len > buffer_size - 512 (enough for header)) */ - if(ea_value_len) - memcpy(parm_data->list[0].name+name_len+1,ea_value,ea_value_len); + /*BB add length check to see if it would fit in + negotiated SMB buffer size BB */ + /* if (ea_value_len > buffer_size - 512 (enough for header)) */ + if (ea_value_len) + memcpy(parm_data->list[0].name+name_len+1, + ea_value, ea_value_len); pSMB->TotalDataCount = pSMB->DataCount; pSMB->ParameterCount = cpu_to_le16(params); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0a1b8bd1dfcb..4af3588c1a96 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1,7 +1,7 @@ /* * fs/cifs/connect.c * - * Copyright (C) International Business Machines Corp., 2002,2006 + * Copyright (C) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> #include <linux/net.h> @@ -85,6 +85,7 @@ struct smb_vol { unsigned direct_io:1; unsigned remap:1; /* set to remap seven reserved chars in filenames */ unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ + unsigned no_linux_ext:1; unsigned sfu_emul:1; unsigned nullauth:1; /* attempt to authenticate with null user */ unsigned nocase; /* request case insensitive filenames */ @@ -93,20 +94,20 @@ struct smb_vol { unsigned int wsize; unsigned int sockopt; unsigned short int port; - char * prepath; + char *prepath; }; -static int ipv4_connect(struct sockaddr_in *psin_server, +static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char * netb_name, - char * server_netb_name); -static int ipv6_connect(struct sockaddr_in6 *psin_server, + char *netb_name, + char *server_netb_name); +static int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket); - /* + /* * cifs tcp session reconnection - * + * * mark tcp session as reconnecting so temporarily locked * mark all smb sessions as reconnecting for tcp session * reconnect tcp session @@ -120,11 +121,11 @@ cifs_reconnect(struct TCP_Server_Info *server) struct list_head *tmp; struct cifsSesInfo *ses; struct cifsTconInfo *tcon; - struct mid_q_entry * mid_entry; - + struct mid_q_entry *mid_entry; + spin_lock(&GlobalMid_Lock); - if( kthread_should_stop() ) { - /* the demux thread will exit normally + if ( kthread_should_stop() ) { + /* the demux thread will exit normally next time through the loop */ spin_unlock(&GlobalMid_Lock); return rc; @@ -150,18 +151,19 @@ cifs_reconnect(struct TCP_Server_Info *server) } list_for_each(tmp, &GlobalTreeConnectionList) { tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if((tcon) && (tcon->ses) && (tcon->ses->server == server)) { + if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) { tcon->tidStatus = CifsNeedReconnect; } } read_unlock(&GlobalSMBSeslock); /* do not want to be sending data on a socket we are freeing */ - down(&server->tcpSem); - if(server->ssocket) { - cFYI(1,("State: 0x%x Flags: 0x%lx", server->ssocket->state, + down(&server->tcpSem); + if (server->ssocket) { + cFYI(1, ("State: 0x%x Flags: 0x%lx", server->ssocket->state, server->ssocket->flags)); - server->ssocket->ops->shutdown(server->ssocket,SEND_SHUTDOWN); - cFYI(1,("Post shutdown state: 0x%x Flags: 0x%lx", server->ssocket->state, + server->ssocket->ops->shutdown(server->ssocket, SEND_SHUTDOWN); + cFYI(1, ("Post shutdown state: 0x%x Flags: 0x%lx", + server->ssocket->state, server->ssocket->flags)); sock_release(server->ssocket); server->ssocket = NULL; @@ -172,8 +174,8 @@ cifs_reconnect(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if(mid_entry) { - if(mid_entry->midState == MID_REQUEST_SUBMITTED) { + if (mid_entry) { + if (mid_entry->midState == MID_REQUEST_SUBMITTED) { /* Mark other intransit requests as needing retry so we do not immediately mark the session bad again (ie after we reconnect @@ -183,29 +185,29 @@ cifs_reconnect(struct TCP_Server_Info *server) } } spin_unlock(&GlobalMid_Lock); - up(&server->tcpSem); + up(&server->tcpSem); - while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood)) - { + while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood)) { try_to_freeze(); - if(server->protocolType == IPV6) { - rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); + if (server->protocolType == IPV6) { + rc = ipv6_connect(&server->addr.sockAddr6, + &server->ssocket); } else { - rc = ipv4_connect(&server->addr.sockAddr, + rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, server->server_RFC1001_name); } - if(rc) { - cFYI(1,("reconnect error %d",rc)); + if (rc) { + cFYI(1, ("reconnect error %d", rc)); msleep(3000); } else { atomic_inc(&tcpSesReconnectCount); spin_lock(&GlobalMid_Lock); - if( !kthread_should_stop() ) + if ( !kthread_should_stop() ) server->tcpStatus = CifsGood; server->sequence_number = 0; - spin_unlock(&GlobalMid_Lock); + spin_unlock(&GlobalMid_Lock); /* atomic_set(&server->inFlight,0);*/ wake_up(&server->response_q); } @@ -213,27 +215,27 @@ cifs_reconnect(struct TCP_Server_Info *server) return rc; } -/* +/* return codes: 0 not a transact2, or all data present >0 transact2 with that much data missing -EINVAL = invalid transact2 */ -static int check2ndT2(struct smb_hdr * pSMB, unsigned int maxBufSize) +static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) { - struct smb_t2_rsp * pSMBt; - int total_data_size; + struct smb_t2_rsp *pSMBt; + int total_data_size; int data_in_this_rsp; int remaining; - if(pSMB->Command != SMB_COM_TRANSACTION2) + if (pSMB->Command != SMB_COM_TRANSACTION2) return 0; - /* check for plausible wct, bcc and t2 data and parm sizes */ - /* check for parm and data offset going beyond end of smb */ - if(pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ - cFYI(1,("invalid transact2 word count")); + /* check for plausible wct, bcc and t2 data and parm sizes */ + /* check for parm and data offset going beyond end of smb */ + if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ + cFYI(1, ("invalid transact2 word count")); return -EINVAL; } @@ -244,25 +246,25 @@ static int check2ndT2(struct smb_hdr * pSMB, unsigned int maxBufSize) remaining = total_data_size - data_in_this_rsp; - if(remaining == 0) + if (remaining == 0) return 0; - else if(remaining < 0) { - cFYI(1,("total data %d smaller than data in frame %d", + else if (remaining < 0) { + cFYI(1, ("total data %d smaller than data in frame %d", total_data_size, data_in_this_rsp)); return -EINVAL; } else { - cFYI(1,("missing %d bytes from transact2, check next response", + cFYI(1, ("missing %d bytes from transact2, check next response", remaining)); - if(total_data_size > maxBufSize) { - cERROR(1,("TotalDataSize %d is over maximum buffer %d", - total_data_size,maxBufSize)); - return -EINVAL; + if (total_data_size > maxBufSize) { + cERROR(1, ("TotalDataSize %d is over maximum buffer %d", + total_data_size, maxBufSize)); + return -EINVAL; } return remaining; } } -static int coalesce_t2(struct smb_hdr * psecond, struct smb_hdr *pTargetSMB) +static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) { struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond; struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; @@ -270,43 +272,43 @@ static int coalesce_t2(struct smb_hdr * psecond, struct smb_hdr *pTargetSMB) int total_in_buf; int remaining; int total_in_buf2; - char * data_area_of_target; - char * data_area_of_buf2; + char *data_area_of_target; + char *data_area_of_buf2; __u16 byte_count; total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); - if(total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) { - cFYI(1,("total data sizes of primary and secondary t2 differ")); + if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) { + cFYI(1, ("total data size of primary and secondary t2 differ")); } total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount); remaining = total_data_size - total_in_buf; - - if(remaining < 0) + + if (remaining < 0) return -EINVAL; - if(remaining == 0) /* nothing to do, ignore */ + if (remaining == 0) /* nothing to do, ignore */ return 0; - + total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount); - if(remaining < total_in_buf2) { - cFYI(1,("transact2 2nd response contains too much data")); + if (remaining < total_in_buf2) { + cFYI(1, ("transact2 2nd response contains too much data")); } /* find end of first SMB data area */ - data_area_of_target = (char *)&pSMBt->hdr.Protocol + + data_area_of_target = (char *)&pSMBt->hdr.Protocol + le16_to_cpu(pSMBt->t2_rsp.DataOffset); /* validate target area */ data_area_of_buf2 = (char *) &pSMB2->hdr.Protocol + - le16_to_cpu(pSMB2->t2_rsp.DataOffset); + le16_to_cpu(pSMB2->t2_rsp.DataOffset); data_area_of_target += total_in_buf; /* copy second buffer into end of first buffer */ - memcpy(data_area_of_target,data_area_of_buf2,total_in_buf2); + memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); total_in_buf += total_in_buf2; pSMBt->t2_rsp.DataCount = cpu_to_le16(total_in_buf); byte_count = le16_to_cpu(BCC_LE(pTargetSMB)); @@ -317,11 +319,11 @@ static int coalesce_t2(struct smb_hdr * psecond, struct smb_hdr *pTargetSMB) byte_count += total_in_buf2; /* BB also add check that we are not beyond maximum buffer size */ - + pTargetSMB->smb_buf_length = byte_count; - if(remaining == total_in_buf2) { - cFYI(1,("found the last secondary response")); + if (remaining == total_in_buf2) { + cFYI(1, ("found the last secondary response")); return 0; /* we are done */ } else /* more responses to go */ return 1; @@ -348,16 +350,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) int isMultiRsp; int reconnect; - allow_signal(SIGKILL); current->flags |= PF_MEMALLOC; server->tsk = current; /* save process info to wake at shutdown */ cFYI(1, ("Demultiplex PID: %d", current->pid)); - write_lock(&GlobalSMBSeslock); + write_lock(&GlobalSMBSeslock); atomic_inc(&tcpSesAllocCount); length = tcpSesAllocCount.counter; write_unlock(&GlobalSMBSeslock); complete(&cifsd_complete); - if(length > 1) { + if (length > 1) { mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -426,10 +427,10 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) break; } if (!try_to_freeze() && (length == -EINTR)) { - cFYI(1,("cifsd thread killed")); + cFYI(1, ("cifsd thread killed")); break; } - cFYI(1,("Reconnect after unexpected peek error %d", + cFYI(1, ("Reconnect after unexpected peek error %d", length)); cifs_reconnect(server); csocket = server->ssocket; @@ -453,26 +454,26 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) with the most common, zero, as regular data */ temp = *((char *) smb_buffer); - /* Note that FC 1001 length is big endian on the wire, + /* Note that FC 1001 length is big endian on the wire, but we convert it here so it is always manipulated as host byte order */ pdu_length = ntohl(smb_buffer->smb_buf_length); smb_buffer->smb_buf_length = pdu_length; - cFYI(1,("rfc1002 length 0x%x)", pdu_length+4)); + cFYI(1, ("rfc1002 length 0x%x", pdu_length+4)); if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) { - continue; + continue; } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) { - cFYI(1,("Good RFC 1002 session rsp")); + cFYI(1, ("Good RFC 1002 session rsp")); continue; } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) { - /* we get this from Windows 98 instead of + /* we get this from Windows 98 instead of an error on SMB negprot response */ - cFYI(1,("Negative RFC1002 Session Response Error 0x%x)", + cFYI(1, ("Negative RFC1002 Session Response Error 0x%x)", pdu_length)); - if(server->tcpStatus == CifsNew) { - /* if nack on negprot (rather than + if (server->tcpStatus == CifsNew) { + /* if nack on negprot (rather than ret of smb negprot error) reconnecting not going to help, ret error to mount */ break; @@ -482,10 +483,10 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) msleep(1000); /* always try 445 first on reconnect since we get NACK on some if we ever - connected to port 139 (the NACK is + connected to port 139 (the NACK is since we do not begin with RFC1001 session initialize frame) */ - server->addr.sockAddr.sin_port = + server->addr.sockAddr.sin_port = htons(CIFS_PORT); cifs_reconnect(server); csocket = server->ssocket; @@ -493,7 +494,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) continue; } } else if (temp != (char) 0) { - cERROR(1,("Unknown RFC 1002 frame")); + cERROR(1, ("Unknown RFC 1002 frame")); cifs_dump_mem(" Received Data: ", (char *)smb_buffer, length); cifs_reconnect(server); @@ -502,7 +503,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } /* else we have an SMB response */ - if((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || + if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || (pdu_length < sizeof (struct smb_hdr) - 1 - 4)) { cERROR(1, ("Invalid size SMB length %d pdu_length %d", length, pdu_length+4)); @@ -510,12 +511,12 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) csocket = server->ssocket; wake_up(&server->response_q); continue; - } + } /* else length ok */ reconnect = 0; - if(pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { + if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) { isLargeBuf = TRUE; memcpy(bigbuf, smallbuf, 4); smb_buffer = bigbuf; @@ -523,11 +524,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) length = 0; iov.iov_base = 4 + (char *)smb_buffer; iov.iov_len = pdu_length; - for (total_read = 0; total_read < pdu_length; + for (total_read = 0; total_read < pdu_length; total_read += length) { length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, pdu_length - total_read, 0); - if( kthread_should_stop() || + if ( kthread_should_stop() || (length == -EINTR)) { /* then will exit */ reconnect = 2; @@ -535,19 +536,19 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } else if (server->tcpStatus == CifsNeedReconnect) { cifs_reconnect(server); csocket = server->ssocket; - /* Reconnect wakes up rspns q */ + /* Reconnect wakes up rspns q */ /* Now we will reread sock */ reconnect = 1; break; - } else if ((length == -ERESTARTSYS) || + } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { msleep(1); /* minimum sleep to prevent looping, - allowing socket to clear and app + allowing socket to clear and app threads to set tcpStatus CifsNeedReconnect if server hung*/ continue; } else if (length <= 0) { - cERROR(1,("Received no data, expecting %d", + cERROR(1, ("Received no data, expecting %d", pdu_length - total_read)); cifs_reconnect(server); csocket = server->ssocket; @@ -555,13 +556,13 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) break; } } - if(reconnect == 2) + if (reconnect == 2) break; - else if(reconnect == 1) + else if (reconnect == 1) continue; length += 4; /* account for rfc1002 hdr */ - + dump_smb(smb_buffer, length); if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { @@ -575,28 +576,28 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if ((mid_entry->mid == smb_buffer->Mid) && + if ((mid_entry->mid == smb_buffer->Mid) && (mid_entry->midState == MID_REQUEST_SUBMITTED) && (mid_entry->command == smb_buffer->Command)) { - if(check2ndT2(smb_buffer,server->maxBuf) > 0) { + if (check2ndT2(smb_buffer,server->maxBuf) > 0) { /* We have a multipart transact2 resp */ isMultiRsp = TRUE; - if(mid_entry->resp_buf) { + if (mid_entry->resp_buf) { /* merge response - fix up 1st*/ - if(coalesce_t2(smb_buffer, + if (coalesce_t2(smb_buffer, mid_entry->resp_buf)) { mid_entry->multiRsp = 1; break; } else { /* all parts received */ mid_entry->multiEnd = 1; - goto multi_t2_fnd; + goto multi_t2_fnd; } } else { - if(!isLargeBuf) { + if (!isLargeBuf) { cERROR(1,("1st trans2 resp needs bigbuf")); /* BB maybe we can fix this up, switch - to already allocated large buffer? */ + to already allocated large buffer? */ } else { /* Have first buffer */ mid_entry->resp_buf = @@ -606,9 +607,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } } break; - } + } mid_entry->resp_buf = smb_buffer; - if(isLargeBuf) + if (isLargeBuf) mid_entry->largeBuf = 1; else mid_entry->largeBuf = 0; @@ -628,24 +629,25 @@ multi_t2_fnd: spin_unlock(&GlobalMid_Lock); if (task_to_wake) { /* Was previous buf put in mpx struct for multi-rsp? */ - if(!isMultiRsp) { + if (!isMultiRsp) { /* smb buffer will be freed by user thread */ - if(isLargeBuf) { + if (isLargeBuf) { bigbuf = NULL; } else smallbuf = NULL; } wake_up_process(task_to_wake); } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE) - && (isMultiRsp == FALSE)) { - cERROR(1, ("No task to wake, unknown frame rcvd! NumMids %d", midCount.counter)); - cifs_dump_mem("Received Data is: ",(char *)smb_buffer, + && (isMultiRsp == FALSE)) { + cERROR(1, ("No task to wake, unknown frame received! " + "NumMids %d", midCount.counter)); + cifs_dump_mem("Received Data is: ", (char *)smb_buffer, sizeof(struct smb_hdr)); #ifdef CONFIG_CIFS_DEBUG2 cifs_dump_detail(smb_buffer); cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ - + } } /* end while !EXITING */ @@ -655,12 +657,12 @@ multi_t2_fnd: /* check if we have blocked requests that need to free */ /* Note that cifs_max_pending is normally 50, but can be set at module install time to as little as two */ - if(atomic_read(&server->inFlight) >= cifs_max_pending) + if (atomic_read(&server->inFlight) >= cifs_max_pending) atomic_set(&server->inFlight, cifs_max_pending - 1); /* We do not want to set the max_pending too low or we could end up with the counter going negative */ spin_unlock(&GlobalMid_Lock); - /* Although there should not be any requests blocked on + /* Although there should not be any requests blocked on this queue it can not hurt to be paranoid and try to wake up requests that may haven been blocked when more than 50 at time were on the wire to the same server - they now will see the session is in exit state @@ -668,8 +670,8 @@ multi_t2_fnd: wake_up_all(&server->request_q); /* give those requests time to exit */ msleep(125); - - if(server->ssocket) { + + if (server->ssocket) { sock_release(csocket); server->ssocket = NULL; } @@ -709,10 +711,10 @@ multi_t2_fnd: list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); if (mid_entry->midState == MID_REQUEST_SUBMITTED) { - cFYI(1, - ("Clearing Mid 0x%x - waking up ",mid_entry->mid)); + cFYI(1, ("Clearing Mid 0x%x - waking up ", + mid_entry->mid)); task_to_wake = mid_entry->tsk; - if(task_to_wake) { + if (task_to_wake) { wake_up_process(task_to_wake); } } @@ -724,7 +726,7 @@ multi_t2_fnd: } if (!list_empty(&server->pending_mid_q)) { - /* mpx threads have not exited yet give them + /* mpx threads have not exited yet give them at least the smb send timeout time for long ops */ /* due to delays on oplock break requests, we need to wait at least 45 seconds before giving up @@ -742,7 +744,7 @@ multi_t2_fnd: /* last chance to mark ses pointers invalid if there are any pointing to this (e.g - if a crazy root user tried to kill cifsd + if a crazy root user tried to kill cifsd kernel thread explicitly this might happen) */ list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, @@ -754,17 +756,18 @@ multi_t2_fnd: write_unlock(&GlobalSMBSeslock); kfree(server); - if(length > 0) { + if (length > 0) { mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); } - + return 0; } static int -cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) +cifs_parse_mount_options(char *options, const char *devname, + struct smb_vol *vol) { char *value; char *data; @@ -772,15 +775,15 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) char separator[2]; separator[0] = ','; - separator[1] = 0; + separator[1] = 0; if (Local_System_Name[0] != 0) - memcpy(vol->source_rfc1001_name, Local_System_Name,15); + memcpy(vol->source_rfc1001_name, Local_System_Name, 15); else { char *nodename = utsname()->nodename; - int n = strnlen(nodename,15); - memset(vol->source_rfc1001_name,0x20,15); - for(i=0 ; i < n ; i++) { + int n = strnlen(nodename, 15); + memset(vol->source_rfc1001_name, 0x20, 15); + for (i = 0; i < n; i++) { /* does not have to be perfect mapping since field is informational, only used for servers that do not support port 445 and it can be overridden at mount time */ @@ -805,31 +808,32 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) if (!options) return 1; - if(strncmp(options,"sep=",4) == 0) { - if(options[4] != 0) { + if (strncmp(options, "sep=", 4) == 0) { + if (options[4] != 0) { separator[0] = options[4]; options += 5; } else { - cFYI(1,("Null separator not allowed")); + cFYI(1, ("Null separator not allowed")); } } - + while ((data = strsep(&options, separator)) != NULL) { if (!*data) continue; if ((value = strchr(data, '=')) != NULL) *value++ = '\0'; - if (strnicmp(data, "user_xattr",10) == 0) {/*parse before user*/ + /* Have to parse this before we parse for "user" */ + if (strnicmp(data, "user_xattr", 10) == 0) { vol->no_xattr = 0; - } else if (strnicmp(data, "nouser_xattr",12) == 0) { + } else if (strnicmp(data, "nouser_xattr", 12) == 0) { vol->no_xattr = 1; } else if (strnicmp(data, "user", 4) == 0) { if (!value) { printk(KERN_WARNING "CIFS: invalid or missing username\n"); return 1; /* needs_arg; */ - } else if(!*value) { + } else if (!*value) { /* null user, ie anonymous, authentication */ vol->nullauth = 1; } @@ -843,12 +847,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) if (!value) { vol->password = NULL; continue; - } else if(value[0] == 0) { + } else if (value[0] == 0) { /* check if string begins with double comma since that would mean the password really does start with a comma, and would not indicate an empty string */ - if(value[1] != separator[0]) { + if (value[1] != separator[0]) { vol->password = NULL; continue; } @@ -857,7 +861,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) /* removed password length check, NTLM passwords can be arbitrarily long */ - /* if comma in password, the string will be + /* if comma in password, the string will be prematurely null terminated. Commas in password are specified across the cifs mount interface by a double comma ie ,, and a comma used as in other cases ie ',' @@ -867,18 +871,18 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) /* NB: password legally can have multiple commas and the only illegal character in a password is null */ - if ((value[temp_len] == 0) && + if ((value[temp_len] == 0) && (value[temp_len+1] == separator[0])) { /* reinsert comma */ value[temp_len] = separator[0]; - temp_len+=2; /* move after the second comma */ - while(value[temp_len] != 0) { + temp_len += 2; /* move after second comma */ + while (value[temp_len] != 0) { if (value[temp_len] == separator[0]) { - if (value[temp_len+1] == + if (value[temp_len+1] == separator[0]) { /* skip second comma */ temp_len++; - } else { + } else { /* single comma indicating start of next parm */ break; @@ -886,24 +890,25 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) } temp_len++; } - if(value[temp_len] == 0) { + if (value[temp_len] == 0) { options = NULL; } else { value[temp_len] = 0; /* point option to start of next parm */ options = value + temp_len + 1; } - /* go from value to value + temp_len condensing + /* go from value to value + temp_len condensing double commas to singles. Note that this ends up allocating a few bytes too many, which is ok */ vol->password = kzalloc(temp_len, GFP_KERNEL); - if(vol->password == NULL) { - printk("CIFS: no memory for pass\n"); + if (vol->password == NULL) { + printk(KERN_WARNING "CIFS: no memory " + "for password\n"); return 1; } - for(i=0,j=0;i<temp_len;i++,j++) { + for (i = 0, j = 0; i < temp_len; i++, j++) { vol->password[j] = value[i]; - if(value[i] == separator[0] + if (value[i] == separator[0] && value[i+1] == separator[0]) { /* skip second comma */ i++; @@ -912,8 +917,9 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) vol->password[j] = 0; } else { vol->password = kzalloc(temp_len+1, GFP_KERNEL); - if(vol->password == NULL) { - printk("CIFS: no memory for pass\n"); + if (vol->password == NULL) { + printk(KERN_WARNING "CIFS: no memory " + "for password\n"); return 1; } strcpy(vol->password, value); @@ -924,20 +930,21 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) } else if (strnlen(value, 35) < 35) { vol->UNCip = value; } else { - printk(KERN_WARNING "CIFS: ip address too long\n"); + printk(KERN_WARNING "CIFS: ip address " + "too long\n"); return 1; } - } else if (strnicmp(data, "sec", 3) == 0) { - if (!value || !*value) { - cERROR(1,("no security value specified")); - continue; - } else if (strnicmp(value, "krb5i", 5) == 0) { - vol->secFlg |= CIFSSEC_MAY_KRB5 | + } else if (strnicmp(data, "sec", 3) == 0) { + if (!value || !*value) { + cERROR(1, ("no security value specified")); + continue; + } else if (strnicmp(value, "krb5i", 5) == 0) { + vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MUST_SIGN; } else if (strnicmp(value, "krb5p", 5) == 0) { - /* vol->secFlg |= CIFSSEC_MUST_SEAL | - CIFSSEC_MAY_KRB5; */ - cERROR(1,("Krb5 cifs privacy not supported")); + /* vol->secFlg |= CIFSSEC_MUST_SEAL | + CIFSSEC_MAY_KRB5; */ + cERROR(1, ("Krb5 cifs privacy not supported")); return 1; } else if (strnicmp(value, "krb5", 4) == 0) { vol->secFlg |= CIFSSEC_MAY_KRB5; @@ -957,33 +964,34 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) vol->secFlg |= CIFSSEC_MAY_NTLMV2; #ifdef CONFIG_CIFS_WEAK_PW_HASH } else if (strnicmp(value, "lanman", 6) == 0) { - vol->secFlg |= CIFSSEC_MAY_LANMAN; + vol->secFlg |= CIFSSEC_MAY_LANMAN; #endif } else if (strnicmp(value, "none", 4) == 0) { vol->nullauth = 1; - } else { - cERROR(1,("bad security option: %s", value)); - return 1; - } + } else { + cERROR(1, ("bad security option: %s", value)); + return 1; + } } else if ((strnicmp(data, "unc", 3) == 0) || (strnicmp(data, "target", 6) == 0) || (strnicmp(data, "path", 4) == 0)) { if (!value || !*value) { - printk(KERN_WARNING - "CIFS: invalid path to network resource\n"); + printk(KERN_WARNING "CIFS: invalid path to " + "network resource\n"); return 1; /* needs_arg; */ } if ((temp_len = strnlen(value, 300)) < 300) { - vol->UNC = kmalloc(temp_len+1,GFP_KERNEL); + vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); if (vol->UNC == NULL) return 1; - strcpy(vol->UNC,value); + strcpy(vol->UNC, value); if (strncmp(vol->UNC, "//", 2) == 0) { vol->UNC[0] = '\\'; vol->UNC[1] = '\\'; - } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { + } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { printk(KERN_WARNING - "CIFS: UNC Path does not begin with // or \\\\ \n"); + "CIFS: UNC Path does not begin " + "with // or \\\\ \n"); return 1; } } else { @@ -1002,43 +1010,47 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) vol->domainname = value; cFYI(1, ("Domain name set")); } else { - printk(KERN_WARNING "CIFS: domain name too long\n"); + printk(KERN_WARNING "CIFS: domain name too " + "long\n"); return 1; } - } else if (strnicmp(data, "prefixpath", 10) == 0) { - if (!value || !*value) { - printk(KERN_WARNING - "CIFS: invalid path prefix\n"); - return 1; /* needs_arg; */ - } - if ((temp_len = strnlen(value, 1024)) < 1024) { + } else if (strnicmp(data, "prefixpath", 10) == 0) { + if (!value || !*value) { + printk(KERN_WARNING + "CIFS: invalid path prefix\n"); + return 1; /* needs_argument */ + } + if ((temp_len = strnlen(value, 1024)) < 1024) { if (value[0] != '/') temp_len++; /* missing leading slash */ - vol->prepath = kmalloc(temp_len+1,GFP_KERNEL); - if (vol->prepath == NULL) - return 1; + vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); + if (vol->prepath == NULL) + return 1; if (value[0] != '/') { vol->prepath[0] = '/'; - strcpy(vol->prepath+1,value); + strcpy(vol->prepath+1, value); } else - strcpy(vol->prepath,value); - cFYI(1,("prefix path %s",vol->prepath)); - } else { - printk(KERN_WARNING "CIFS: prefix too long\n"); - return 1; - } + strcpy(vol->prepath, value); + cFYI(1, ("prefix path %s", vol->prepath)); + } else { + printk(KERN_WARNING "CIFS: prefix too long\n"); + return 1; + } } else if (strnicmp(data, "iocharset", 9) == 0) { if (!value || !*value) { - printk(KERN_WARNING "CIFS: invalid iocharset specified\n"); + printk(KERN_WARNING "CIFS: invalid iocharset " + "specified\n"); return 1; /* needs_arg; */ } if (strnlen(value, 65) < 65) { - if (strnicmp(value,"default",7)) + if (strnicmp(value, "default", 7)) vol->iocharset = value; - /* if iocharset not set load_nls_default used by caller */ - cFYI(1, ("iocharset set to %s",value)); + /* if iocharset not set then load_nls_default + is used by caller */ + cFYI(1, ("iocharset set to %s", value)); } else { - printk(KERN_WARNING "CIFS: iocharset name too long.\n"); + printk(KERN_WARNING "CIFS: iocharset name " + "too long.\n"); return 1; } } else if (strnicmp(data, "uid", 3) == 0) { @@ -1090,54 +1102,59 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) } } else if (strnicmp(data, "netbiosname", 4) == 0) { if (!value || !*value || (*value == ' ')) { - cFYI(1,("invalid (empty) netbiosname specified")); + cFYI(1, ("invalid (empty) netbiosname")); } else { - memset(vol->source_rfc1001_name,0x20,15); - for(i=0;i<15;i++) { - /* BB are there cases in which a comma can be + memset(vol->source_rfc1001_name, 0x20, 15); + for (i = 0; i < 15; i++) { + /* BB are there cases in which a comma can be valid in this workstation netbios name (and need special handling)? */ /* We do not uppercase netbiosname for user */ - if (value[i]==0) + if (value[i] == 0) break; - else - vol->source_rfc1001_name[i] = value[i]; + else + vol->source_rfc1001_name[i] = + value[i]; } /* The string has 16th byte zero still from set at top of the function */ - if ((i==15) && (value[i] != 0)) - printk(KERN_WARNING "CIFS: netbiosname longer than 15 truncated.\n"); + if ((i == 15) && (value[i] != 0)) + printk(KERN_WARNING "CIFS: netbiosname" + " longer than 15 truncated.\n"); } } else if (strnicmp(data, "servern", 7) == 0) { /* servernetbiosname specified override *SMBSERVER */ if (!value || !*value || (*value == ' ')) { - cFYI(1,("empty server netbiosname specified")); + cFYI(1, ("empty server netbiosname specified")); } else { /* last byte, type, is 0x20 for servr type */ - memset(vol->target_rfc1001_name,0x20,16); + memset(vol->target_rfc1001_name, 0x20, 16); - for(i=0;i<15;i++) { + for (i = 0; i < 15; i++) { /* BB are there cases in which a comma can be - valid in this workstation netbios name (and need - special handling)? */ + valid in this workstation netbios name + (and need special handling)? */ - /* user or mount helper must uppercase netbiosname */ - if (value[i]==0) + /* user or mount helper must uppercase + the netbiosname */ + if (value[i] == 0) break; else - vol->target_rfc1001_name[i] = value[i]; + vol->target_rfc1001_name[i] = + value[i]; } /* The string has 16th byte zero still from set at top of the function */ - if ((i==15) && (value[i] != 0)) - printk(KERN_WARNING "CIFS: server netbiosname longer than 15 truncated.\n"); + if ((i == 15) && (value[i] != 0)) + printk(KERN_WARNING "CIFS: server net" + "biosname longer than 15 truncated.\n"); } } else if (strnicmp(data, "credentials", 4) == 0) { /* ignore */ } else if (strnicmp(data, "version", 3) == 0) { /* ignore */ - } else if (strnicmp(data, "guest",5) == 0) { + } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = TRUE; @@ -1149,11 +1166,11 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) (strnicmp(data, "noauto", 6) == 0) || (strnicmp(data, "dev", 3) == 0)) { /* The mount tool or mount.cifs helper (if present) - uses these opts to set flags, and the flags are read - by the kernel vfs layer before we get here (ie - before read super) so there is no point trying to - parse these options again and set anything and it - is ok to just ignore them */ + uses these opts to set flags, and the flags are read + by the kernel vfs layer before we get here (ie + before read super) so there is no point trying to + parse these options again and set anything and it + is ok to just ignore them */ continue; } else if (strnicmp(data, "ro", 2) == 0) { vol->rw = FALSE; @@ -1169,26 +1186,31 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) vol->remap = 1; } else if (strnicmp(data, "nomapchars", 10) == 0) { vol->remap = 0; - } else if (strnicmp(data, "sfu", 3) == 0) { - vol->sfu_emul = 1; - } else if (strnicmp(data, "nosfu", 5) == 0) { - vol->sfu_emul = 0; + } else if (strnicmp(data, "sfu", 3) == 0) { + vol->sfu_emul = 1; + } else if (strnicmp(data, "nosfu", 5) == 0) { + vol->sfu_emul = 0; } else if (strnicmp(data, "posixpaths", 10) == 0) { vol->posix_paths = 1; } else if (strnicmp(data, "noposixpaths", 12) == 0) { vol->posix_paths = 0; - } else if ((strnicmp(data, "nocase", 6) == 0) || + } else if (strnicmp(data, "nounix", 6) == 0) { + vol->no_linux_ext = 1; + } else if (strnicmp(data, "nolinux", 7) == 0) { + vol->no_linux_ext = 1; + } else if ((strnicmp(data, "nocase", 6) == 0) || (strnicmp(data, "ignorecase", 10) == 0)) { - vol->nocase = 1; + vol->nocase = 1; } else if (strnicmp(data, "brl", 3) == 0) { vol->nobrl = 0; - } else if ((strnicmp(data, "nobrl", 5) == 0) || + } else if ((strnicmp(data, "nobrl", 5) == 0) || (strnicmp(data, "nolock", 6) == 0)) { vol->nobrl = 1; /* turn off mandatory locking in mode if remote locking is turned off since the local vfs will do advisory */ - if(vol->file_mode == (S_IALLUGO & ~(S_ISUID | S_IXGRP))) + if (vol->file_mode == + (S_IALLUGO & ~(S_ISUID | S_IXGRP))) vol->file_mode = S_IALLUGO; } else if (strnicmp(data, "setuids", 7) == 0) { vol->setuids = 1; @@ -1202,55 +1224,61 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) vol->intr = 0; } else if (strnicmp(data, "intr", 4) == 0) { vol->intr = 1; - } else if (strnicmp(data, "serverino",7) == 0) { + } else if (strnicmp(data, "serverino", 7) == 0) { vol->server_ino = 1; - } else if (strnicmp(data, "noserverino",9) == 0) { + } else if (strnicmp(data, "noserverino", 9) == 0) { vol->server_ino = 0; - } else if (strnicmp(data, "cifsacl",7) == 0) { + } else if (strnicmp(data, "cifsacl", 7) == 0) { vol->cifs_acl = 1; } else if (strnicmp(data, "nocifsacl", 9) == 0) { vol->cifs_acl = 0; - } else if (strnicmp(data, "acl",3) == 0) { + } else if (strnicmp(data, "acl", 3) == 0) { vol->no_psx_acl = 0; - } else if (strnicmp(data, "noacl",5) == 0) { + } else if (strnicmp(data, "noacl", 5) == 0) { vol->no_psx_acl = 1; - } else if (strnicmp(data, "sign",4) == 0) { + } else if (strnicmp(data, "sign", 4) == 0) { vol->secFlg |= CIFSSEC_MUST_SIGN; /* } else if (strnicmp(data, "seal",4) == 0) { vol->secFlg |= CIFSSEC_MUST_SEAL; */ - } else if (strnicmp(data, "direct",6) == 0) { + } else if (strnicmp(data, "direct", 6) == 0) { vol->direct_io = 1; - } else if (strnicmp(data, "forcedirectio",13) == 0) { + } else if (strnicmp(data, "forcedirectio", 13) == 0) { vol->direct_io = 1; - } else if (strnicmp(data, "in6_addr",8) == 0) { + } else if (strnicmp(data, "in6_addr", 8) == 0) { if (!value || !*value) { vol->in6_addr = NULL; } else if (strnlen(value, 49) == 48) { vol->in6_addr = value; } else { - printk(KERN_WARNING "CIFS: ip v6 address not 48 characters long\n"); + printk(KERN_WARNING "CIFS: ip v6 address not " + "48 characters long\n"); return 1; } } else if (strnicmp(data, "noac", 4) == 0) { - printk(KERN_WARNING "CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n"); + printk(KERN_WARNING "CIFS: Mount option noac not " + "supported. Instead set " + "/proc/fs/cifs/LookupCacheEnabled to 0\n"); } else - printk(KERN_WARNING "CIFS: Unknown mount option %s\n",data); + printk(KERN_WARNING "CIFS: Unknown mount option %s\n", + data); } if (vol->UNC == NULL) { if (devname == NULL) { - printk(KERN_WARNING "CIFS: Missing UNC name for mount target\n"); + printk(KERN_WARNING "CIFS: Missing UNC name for mount " + "target\n"); return 1; } if ((temp_len = strnlen(devname, 300)) < 300) { - vol->UNC = kmalloc(temp_len+1,GFP_KERNEL); + vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); if (vol->UNC == NULL) return 1; - strcpy(vol->UNC,devname); + strcpy(vol->UNC, devname); if (strncmp(vol->UNC, "//", 2) == 0) { vol->UNC[0] = '\\'; vol->UNC[1] = '\\'; } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { - printk(KERN_WARNING "CIFS: UNC Path does not begin with // or \\\\ \n"); + printk(KERN_WARNING "CIFS: UNC Path does not " + "begin with // or \\\\ \n"); return 1; } } else { @@ -1258,14 +1286,14 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) return 1; } } - if(vol->UNCip == NULL) + if (vol->UNCip == NULL) vol->UNCip = &vol->UNC[2]; return 0; } static struct cifsSesInfo * -cifs_find_tcp_session(struct in_addr * target_ip_addr, +cifs_find_tcp_session(struct in_addr *target_ip_addr, struct in6_addr *target_ip6_addr, char *userName, struct TCP_Server_Info **psrvTcp) { @@ -1277,19 +1305,25 @@ cifs_find_tcp_session(struct in_addr * target_ip_addr, list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if (ses->server) { - if((target_ip_addr && + if ((target_ip_addr && (ses->server->addr.sockAddr.sin_addr.s_addr == target_ip_addr->s_addr)) || (target_ip6_addr && memcmp(&ses->server->addr.sockAddr6.sin6_addr, - target_ip6_addr,sizeof(*target_ip6_addr)))){ - /* BB lock server and tcp session and increment use count here?? */ - *psrvTcp = ses->server; /* found a match on the TCP session */ + target_ip6_addr, sizeof(*target_ip6_addr)))) { + /* BB lock server and tcp session and increment + use count here?? */ + + /* found a match on the TCP session */ + *psrvTcp = ses->server; + /* BB check if reconnection needed */ if (strncmp (ses->userName, userName, MAX_USERNAME_SIZE) == 0){ read_unlock(&GlobalSMBSeslock); - return ses; /* found exact match on both tcp and SMB sessions */ + /* Found exact match on both TCP and + SMB sessions */ + return ses; } } } @@ -1320,7 +1354,8 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) /* BB lock tcon, server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ - cFYI(1,("IP match, old UNC: %s new: %s", + cFYI(1, + ("IP match, old UNC: %s new: %s", tcon->treeName, uncName)); if (strncmp (tcon->treeName, uncName, @@ -1355,11 +1390,11 @@ connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, unsigned int num_referrals; int rc = 0; - rc = get_dfs_path(xid, pSesInfo,old_path, nls_codepage, + rc = get_dfs_path(xid, pSesInfo, old_path, nls_codepage, &num_referrals, &referrals, remap); /* BB Add in code to: if valid refrl, if not ip address contact - the helper that resolves tcp names, mount to it, try to + the helper that resolves tcp names, mount to it, try to tcon to it unmount it if fail */ kfree(referrals); @@ -1368,10 +1403,9 @@ connect_to_dfs_path(int xid, struct cifsSesInfo *pSesInfo, } int -get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, - const char *old_path, const struct nls_table *nls_codepage, - unsigned int *pnum_referrals, - unsigned char ** preferrals, int remap) +get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, + const struct nls_table *nls_codepage, unsigned int *pnum_referrals, + unsigned char **preferrals, int remap) { char *temp_unc; int rc = 0; @@ -1380,7 +1414,8 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, if (pSesInfo->ipc_tid == 0) { temp_unc = kmalloc(2 /* for slashes */ + - strnlen(pSesInfo->serverName,SERVER_NAME_LEN_WITH_NULL * 2) + strnlen(pSesInfo->serverName, + SERVER_NAME_LEN_WITH_NULL * 2) + 1 + 4 /* slash IPC$ */ + 2, GFP_KERNEL); if (temp_unc == NULL) @@ -1391,7 +1426,7 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, strcpy(temp_unc + 2 + strlen(pSesInfo->serverName), "\\IPC$"); rc = CIFSTCon(xid, pSesInfo, temp_unc, NULL, nls_codepage); cFYI(1, - ("CIFS Tcon rc = %d ipc_tid = %d", rc,pSesInfo->ipc_tid)); + ("CIFS Tcon rc = %d ipc_tid = %d", rc, pSesInfo->ipc_tid)); kfree(temp_unc); } if (rc == 0) @@ -1402,62 +1437,63 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, } /* See RFC1001 section 14 on representation of Netbios names */ -static void rfc1002mangle(char * target,char * source, unsigned int length) +static void rfc1002mangle(char *target, char *source, unsigned int length) { - unsigned int i,j; + unsigned int i, j; - for(i=0,j=0;i<(length);i++) { + for (i = 0, j = 0; i < (length); i++) { /* mask a nibble at a time and encode */ target[j] = 'A' + (0x0F & (source[i] >> 4)); target[j+1] = 'A' + (0x0F & source[i]); - j+=2; + j += 2; } } static int -ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char * netbios_name, char * target_name) +ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, + char *netbios_name, char *target_name) { int rc = 0; int connected = 0; __be16 orig_port = 0; - if(*csocket == NULL) { - rc = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, csocket); + if (*csocket == NULL) { + rc = sock_create_kern(PF_INET, SOCK_STREAM, + IPPROTO_TCP, csocket); if (rc < 0) { - cERROR(1, ("Error %d creating socket",rc)); + cERROR(1, ("Error %d creating socket", rc)); *csocket = NULL; return rc; } else { /* BB other socket options to set KEEPALIVE, NODELAY? */ - cFYI(1,("Socket created")); - (*csocket)->sk->sk_allocation = GFP_NOFS; + cFYI(1, ("Socket created")); + (*csocket)->sk->sk_allocation = GFP_NOFS; } } psin_server->sin_family = AF_INET; - if(psin_server->sin_port) { /* user overrode default port */ + if (psin_server->sin_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, - sizeof (struct sockaddr_in),0); + sizeof (struct sockaddr_in), 0); if (rc >= 0) connected = 1; - } + } - if(!connected) { - /* save original port so we can retry user specified port + if (!connected) { + /* save original port so we can retry user specified port later if fall back ports fail this time */ orig_port = psin_server->sin_port; /* do not retry on the same port we just failed on */ - if(psin_server->sin_port != htons(CIFS_PORT)) { + if (psin_server->sin_port != htons(CIFS_PORT)) { psin_server->sin_port = htons(CIFS_PORT); rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, - sizeof (struct sockaddr_in),0); + sizeof (struct sockaddr_in), 0); if (rc >= 0) connected = 1; } @@ -1465,60 +1501,63 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, if (!connected) { psin_server->sin_port = htons(RFC1001_PORT); rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) - psin_server, sizeof (struct sockaddr_in),0); - if (rc >= 0) + psin_server, + sizeof (struct sockaddr_in), 0); + if (rc >= 0) connected = 1; } /* give up here - unless we want to retry on different protocol families some day */ if (!connected) { - if(orig_port) + if (orig_port) psin_server->sin_port = orig_port; - cFYI(1,("Error %d connecting to server via ipv4",rc)); + cFYI(1, ("Error %d connecting to server via ipv4", rc)); sock_release(*csocket); *csocket = NULL; return rc; } - /* Eventually check for other socket options to change from - the default. sock_setsockopt not used because it expects + /* Eventually check for other socket options to change from + the default. sock_setsockopt not used because it expects user space buffer */ - cFYI(1,("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",(*csocket)->sk->sk_sndbuf, + cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", + (*csocket)->sk->sk_sndbuf, (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); (*csocket)->sk->sk_rcvtimeo = 7 * HZ; /* make the bufsizes depend on wsize/rsize and max requests */ - if((*csocket)->sk->sk_sndbuf < (200 * 1024)) + if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) (*csocket)->sk->sk_sndbuf = 200 * 1024; - if((*csocket)->sk->sk_rcvbuf < (140 * 1024)) + if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) (*csocket)->sk->sk_rcvbuf = 140 * 1024; /* send RFC1001 sessinit */ - if(psin_server->sin_port == htons(RFC1001_PORT)) { + if (psin_server->sin_port == htons(RFC1001_PORT)) { /* some servers require RFC1001 sessinit before sending - negprot - BB check reconnection in case where second + negprot - BB check reconnection in case where second sessinit is sent but no second negprot */ - struct rfc1002_session_packet * ses_init_buf; - struct smb_hdr * smb_buf; - ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), GFP_KERNEL); - if(ses_init_buf) { + struct rfc1002_session_packet *ses_init_buf; + struct smb_hdr *smb_buf; + ses_init_buf = kzalloc(sizeof(struct rfc1002_session_packet), + GFP_KERNEL); + if (ses_init_buf) { ses_init_buf->trailer.session_req.called_len = 32; - if(target_name && (target_name[0] != 0)) { + if (target_name && (target_name[0] != 0)) { rfc1002mangle(ses_init_buf->trailer.session_req.called_name, target_name, 16); } else { rfc1002mangle(ses_init_buf->trailer.session_req.called_name, - DEFAULT_CIFS_CALLED_NAME,16); + DEFAULT_CIFS_CALLED_NAME, 16); } ses_init_buf->trailer.session_req.calling_len = 32; /* calling name ends in null (byte 16) from old smb convention. */ - if(netbios_name && (netbios_name[0] !=0)) { + if (netbios_name && (netbios_name[0] != 0)) { rfc1002mangle(ses_init_buf->trailer.session_req.calling_name, - netbios_name,16); + netbios_name, 16); } else { rfc1002mangle(ses_init_buf->trailer.session_req.calling_name, - "LINUX_CIFS_CLNT",16); + "LINUX_CIFS_CLNT", 16); } ses_init_buf->trailer.session_req.scope1 = 0; ses_init_buf->trailer.session_req.scope2 = 0; @@ -1528,20 +1567,20 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, rc = smb_send(*csocket, smb_buf, 0x44, (struct sockaddr *)psin_server); kfree(ses_init_buf); - msleep(1); /* RFC1001 layer in at least one server + msleep(1); /* RFC1001 layer in at least one server requires very short break before negprot presumably because not expecting negprot to follow so fast. This is a simple - solution that works without + solution that works without complicating the code and causes no significant slowing down on mount for everyone else */ } - /* else the negprot may still work without this + /* else the negprot may still work without this even though malloc failed */ - + } - + return rc; } @@ -1552,41 +1591,42 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) int connected = 0; __be16 orig_port = 0; - if(*csocket == NULL) { - rc = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, csocket); + if (*csocket == NULL) { + rc = sock_create_kern(PF_INET6, SOCK_STREAM, + IPPROTO_TCP, csocket); if (rc < 0) { - cERROR(1, ("Error %d creating ipv6 socket",rc)); + cERROR(1, ("Error %d creating ipv6 socket", rc)); *csocket = NULL; return rc; } else { /* BB other socket options to set KEEPALIVE, NODELAY? */ - cFYI(1,("ipv6 Socket created")); + cFYI(1, ("ipv6 Socket created")); (*csocket)->sk->sk_allocation = GFP_NOFS; } } psin_server->sin6_family = AF_INET6; - if(psin_server->sin6_port) { /* user overrode default port */ + if (psin_server->sin6_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, - sizeof (struct sockaddr_in6),0); + sizeof (struct sockaddr_in6), 0); if (rc >= 0) connected = 1; - } + } - if(!connected) { - /* save original port so we can retry user specified port + if (!connected) { + /* save original port so we can retry user specified port later if fall back ports fail this time */ orig_port = psin_server->sin6_port; /* do not retry on the same port we just failed on */ - if(psin_server->sin6_port != htons(CIFS_PORT)) { + if (psin_server->sin6_port != htons(CIFS_PORT)) { psin_server->sin6_port = htons(CIFS_PORT); rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, - sizeof (struct sockaddr_in6),0); + sizeof (struct sockaddr_in6), 0); if (rc >= 0) connected = 1; } @@ -1594,31 +1634,31 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) if (!connected) { psin_server->sin6_port = htons(RFC1001_PORT); rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) - psin_server, sizeof (struct sockaddr_in6),0); - if (rc >= 0) + psin_server, sizeof (struct sockaddr_in6), 0); + if (rc >= 0) connected = 1; } /* give up here - unless we want to retry on different protocol families some day */ if (!connected) { - if(orig_port) + if (orig_port) psin_server->sin6_port = orig_port; - cFYI(1,("Error %d connecting to server via ipv6",rc)); + cFYI(1, ("Error %d connecting to server via ipv6", rc)); sock_release(*csocket); *csocket = NULL; return rc; } - /* Eventually check for other socket options to change from - the default. sock_setsockopt not used because it expects + /* Eventually check for other socket options to change from + the default. sock_setsockopt not used because it expects user space buffer */ (*csocket)->sk->sk_rcvtimeo = 7 * HZ; - + return rc; } -void reset_cifs_unix_caps(int xid, struct cifsTconInfo * tcon, - struct super_block * sb, struct smb_vol * vol_info) +void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon, + struct super_block *sb, struct smb_vol *vol_info) { /* if we are reconnecting then should we check to see if * any requested capabilities changed locally e.g. via @@ -1630,65 +1670,87 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo * tcon, * What if we wanted to mount the server share twice once with * and once without posixacls or posix paths? */ __u64 saved_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - - - if(!CIFSSMBQFSUnixInfo(xid, tcon)) { + + if (vol_info && vol_info->no_linux_ext) { + tcon->fsUnixInfo.Capability = 0; + tcon->unix_ext = 0; /* Unix Extensions disabled */ + cFYI(1, ("Linux protocol extensions disabled")); + return; + } else if (vol_info) + tcon->unix_ext = 1; /* Unix Extensions supported */ + + if (tcon->unix_ext == 0) { + cFYI(1, ("Unix extensions disabled so not set on reconnect")); + return; + } + + if (!CIFSSMBQFSUnixInfo(xid, tcon)) { __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - + /* check for reconnect case in which we do not want to change the mount behavior if we can avoid it */ - if(vol_info == NULL) { - /* turn off POSIX ACL and PATHNAMES if not set + if (vol_info == NULL) { + /* turn off POSIX ACL and PATHNAMES if not set originally at mount time */ if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; - - - - } - + cap &= CIFS_UNIX_CAP_MASK; - if(vol_info && vol_info->no_psx_acl) + if (vol_info && vol_info->no_psx_acl) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; - else if(CIFS_UNIX_POSIX_ACL_CAP & cap) { - cFYI(1,("negotiated posix acl support")); - if(sb) + else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { + cFYI(1, ("negotiated posix acl support")); + if (sb) sb->s_flags |= MS_POSIXACL; } - if(vol_info && vol_info->posix_paths == 0) + if (vol_info && vol_info->posix_paths == 0) cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; - else if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { - cFYI(1,("negotiate posix pathnames")); - if(sb) - CIFS_SB(sb)->mnt_cifs_flags |= + else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { + cFYI(1, ("negotiate posix pathnames")); + if (sb) + CIFS_SB(sb)->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; } - + /* We might be setting the path sep back to a different form if we are reconnecting and the server switched its - posix path capability for this share */ - if(sb && (CIFS_SB(sb)->prepathlen > 0)) + posix path capability for this share */ + if (sb && (CIFS_SB(sb)->prepathlen > 0)) CIFS_SB(sb)->prepath[0] = CIFS_DIR_SEP(CIFS_SB(sb)); - - cFYI(1,("Negotiate caps 0x%x",(int)cap)); + + if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { + if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { + CIFS_SB(sb)->rsize = 127 * 1024; +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1, ("larger reads not supported by srv")); +#endif + } + } + + + cFYI(1, ("Negotiate caps 0x%x", (int)cap)); #ifdef CONFIG_CIFS_DEBUG2 - if(cap & CIFS_UNIX_FCNTL_CAP) - cFYI(1,("FCNTL cap")); - if(cap & CIFS_UNIX_EXTATTR_CAP) - cFYI(1,("EXTATTR cap")); - if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) - cFYI(1,("POSIX path cap")); - if(cap & CIFS_UNIX_XATTR_CAP) - cFYI(1,("XATTR cap")); - if(cap & CIFS_UNIX_POSIX_ACL_CAP) - cFYI(1,("POSIX ACL cap")); + if (cap & CIFS_UNIX_FCNTL_CAP) + cFYI(1, ("FCNTL cap")); + if (cap & CIFS_UNIX_EXTATTR_CAP) + cFYI(1, ("EXTATTR cap")); + if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) + cFYI(1, ("POSIX path cap")); + if (cap & CIFS_UNIX_XATTR_CAP) + cFYI(1, ("XATTR cap")); + if (cap & CIFS_UNIX_POSIX_ACL_CAP) + cFYI(1, ("POSIX ACL cap")); + if (cap & CIFS_UNIX_LARGE_READ_CAP) + cFYI(1, ("very large read cap")); + if (cap & CIFS_UNIX_LARGE_WRITE_CAP) + cFYI(1, ("very large write cap")); #endif /* CIFS_DEBUG2 */ if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { - cFYI(1,("setting capabilities failed")); + cFYI(1, ("setting capabilities failed")); } } } @@ -1712,8 +1774,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, xid = GetXid(); /* cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); */ - - memset(&volume_info,0,sizeof(struct smb_vol)); + + memset(&volume_info, 0, sizeof(struct smb_vol)); if (cifs_parse_mount_options(mount_data, devname, &volume_info)) { kfree(volume_info.UNC); kfree(volume_info.password); @@ -1723,15 +1785,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } if (volume_info.nullauth) { - cFYI(1,("null user")); + cFYI(1, ("null user")); volume_info.username = NULL; } else if (volume_info.username) { /* BB fixme parse for domain name here */ - cFYI(1, ("Username: %s ", volume_info.username)); + cFYI(1, ("Username: %s", volume_info.username)); } else { cifserror("No username specified"); - /* In userspace mount helper we can get user name from alternate - locations such as env variables and files on disk */ + /* In userspace mount helper we can get user name from alternate + locations such as env variables and files on disk */ kfree(volume_info.UNC); kfree(volume_info.password); kfree(volume_info.prepath); @@ -1740,18 +1802,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } if (volume_info.UNCip && volume_info.UNC) { - rc = cifs_inet_pton(AF_INET, volume_info.UNCip,&sin_server.sin_addr.s_addr); + rc = cifs_inet_pton(AF_INET, volume_info.UNCip, + &sin_server.sin_addr.s_addr); - if(rc <= 0) { + if (rc <= 0) { /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6,volume_info.UNCip,&sin_server6.sin6_addr.in6_u); - if(rc > 0) + rc = cifs_inet_pton(AF_INET6, volume_info.UNCip, + &sin_server6.sin6_addr.in6_u); + if (rc > 0) address_type = AF_INET6; } else { address_type = AF_INET; } - - if(rc <= 0) { + + if (rc <= 0) { /* we failed translating address */ kfree(volume_info.UNC); kfree(volume_info.password); @@ -1763,9 +1827,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("UNC: %s ip: %s", volume_info.UNC, volume_info.UNCip)); /* success */ rc = 0; - } else if (volume_info.UNCip){ - /* BB using ip addr as server name connect to the DFS root below */ - cERROR(1,("Connecting to DFS root not implemented yet")); + } else if (volume_info.UNCip) { + /* BB using ip addr as server name to connect to the + DFS root below */ + cERROR(1, ("Connecting to DFS root not implemented yet")); kfree(volume_info.UNC); kfree(volume_info.password); kfree(volume_info.prepath); @@ -1773,7 +1838,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, return -EINVAL; } else /* which servers DFS root would we conect to */ { cERROR(1, - ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified")); + ("CIFS mount error: No UNC path (e.g. -o " + "unc=//192.168.1.100/public) specified")); kfree(volume_info.UNC); kfree(volume_info.password); kfree(volume_info.prepath); @@ -1782,13 +1848,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } /* this is needed for ASCII cp to Unicode converts */ - if(volume_info.iocharset == NULL) { + if (volume_info.iocharset == NULL) { cifs_sb->local_nls = load_nls_default(); /* load_nls_default can not return null */ } else { cifs_sb->local_nls = load_nls(volume_info.iocharset); - if(cifs_sb->local_nls == NULL) { - cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset)); + if (cifs_sb->local_nls == NULL) { + cERROR(1, ("CIFS mount error: iocharset %s not found", + volume_info.iocharset)); kfree(volume_info.UNC); kfree(volume_info.password); kfree(volume_info.prepath); @@ -1797,12 +1864,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } } - if(address_type == AF_INET) + if (address_type == AF_INET) existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, NULL /* no ipv6 addr */, volume_info.username, &srvTcp); - else if(address_type == AF_INET6) { - cFYI(1,("looking for ipv6 address")); + else if (address_type == AF_INET6) { + cFYI(1, ("looking for ipv6 address")); existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, &sin_server6.sin6_addr, volume_info.username, &srvTcp); @@ -1814,26 +1881,25 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, return -EINVAL; } - if (srvTcp) { - cFYI(1, ("Existing tcp session with server found")); + cFYI(1, ("Existing tcp session with server found")); } else { /* create socket */ if (volume_info.port) sin_server.sin_port = htons(volume_info.port); else sin_server.sin_port = 0; if (address_type == AF_INET6) { - cFYI(1,("attempting ipv6 connect")); + cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6,&csocket); - } else - rc = ipv4_connect(&sin_server,&csocket, + rc = ipv6_connect(&sin_server6, &csocket); + } else + rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, volume_info.target_rfc1001_name); if (rc < 0) { - cERROR(1, - ("Error connecting to IPv4 socket. Aborting operation")); + cERROR(1, ("Error connecting to IPv4 socket. " + "Aborting operation")); if (csocket != NULL) sock_release(csocket); kfree(volume_info.UNC); @@ -1854,8 +1920,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, return rc; } else { memset(srvTcp, 0, sizeof (struct TCP_Server_Info)); - memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof (struct sockaddr_in)); - atomic_set(&srvTcp->inFlight,0); + memcpy(&srvTcp->addr.sockAddr, &sin_server, + sizeof (struct sockaddr_in)); + atomic_set(&srvTcp->inFlight, 0); /* BB Add code for ipv6 case too */ srvTcp->ssocket = csocket; srvTcp->protocolType = IPV4; @@ -1870,7 +1937,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, srvTcp->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread, srvTcp, "cifsd"); if ( IS_ERR(srvTcp->tsk) ) { rc = PTR_ERR(srvTcp->tsk); - cERROR(1,("error %d create cifsd thread", rc)); + cERROR(1, ("error %d create cifsd thread", rc)); srvTcp->tsk = NULL; sock_release(csocket); kfree(volume_info.UNC); @@ -1881,8 +1948,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } wait_for_completion(&cifsd_complete); rc = 0; - memcpy(srvTcp->workstation_RFC1001_name, volume_info.source_rfc1001_name,16); - memcpy(srvTcp->server_RFC1001_name, volume_info.target_rfc1001_name,16); + memcpy(srvTcp->workstation_RFC1001_name, + volume_info.source_rfc1001_name, 16); + memcpy(srvTcp->server_RFC1001_name, + volume_info.target_rfc1001_name, 16); srvTcp->sequence_number = 0; } } @@ -1903,16 +1972,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, NIPQUAD(sin_server.sin_addr.s_addr)); } - if (!rc){ - /* volume_info.password freed at unmount */ + if (!rc) { + /* volume_info.password freed at unmount */ if (volume_info.password) pSesInfo->password = volume_info.password; if (volume_info.username) strncpy(pSesInfo->userName, - volume_info.username,MAX_USERNAME_SIZE); + volume_info.username, + MAX_USERNAME_SIZE); if (volume_info.domainname) { int len = strlen(volume_info.domainname); - pSesInfo->domainName = + pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL); if (pSesInfo->domainName) strcpy(pSesInfo->domainName, @@ -1922,46 +1992,48 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, pSesInfo->overrideSecFlg = volume_info.secFlg; down(&pSesInfo->sesSem); /* BB FIXME need to pass vol->secFlgs BB */ - rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls); + rc = cifs_setup_session(xid, pSesInfo, + cifs_sb->local_nls); up(&pSesInfo->sesSem); if (!rc) atomic_inc(&srvTcp->socketUseCount); } else kfree(volume_info.password); } - + /* search for existing tcon to this server share */ if (!rc) { if (volume_info.rsize > CIFSMaxBufSize) { - cERROR(1,("rsize %d too large, using MaxBufSize", + cERROR(1, ("rsize %d too large, using MaxBufSize", volume_info.rsize)); cifs_sb->rsize = CIFSMaxBufSize; - } else if((volume_info.rsize) && (volume_info.rsize <= CIFSMaxBufSize)) + } else if ((volume_info.rsize) && + (volume_info.rsize <= CIFSMaxBufSize)) cifs_sb->rsize = volume_info.rsize; else /* default */ cifs_sb->rsize = CIFSMaxBufSize; if (volume_info.wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) { - cERROR(1,("wsize %d too large using 4096 instead", + cERROR(1, ("wsize %d too large, using 4096 instead", volume_info.wsize)); cifs_sb->wsize = 4096; } else if (volume_info.wsize) cifs_sb->wsize = volume_info.wsize; else - cifs_sb->wsize = + cifs_sb->wsize = min_t(const int, PAGEVEC_SIZE * PAGE_CACHE_SIZE, 127*1024); /* old default of CIFSMaxBufSize was too small now - that SMB Write2 can send multiple pages in kvec. + that SMB Write2 can send multiple pages in kvec. RFC1001 does not describe what happens when frame bigger than 128K is sent so use that as max in conjunction with 52K kvec constraint on arch with 4K page size */ if (cifs_sb->rsize < 2048) { - cifs_sb->rsize = 2048; + cifs_sb->rsize = 2048; /* Windows ME may prefer this */ - cFYI(1,("readsize set to minimum 2048")); + cFYI(1, ("readsize set to minimum: 2048")); } /* calculate prepath */ cifs_sb->prepath = volume_info.prepath; @@ -1969,14 +2041,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cifs_sb->prepathlen = strlen(cifs_sb->prepath); cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb); volume_info.prepath = NULL; - } else + } else cifs_sb->prepathlen = 0; cifs_sb->mnt_uid = volume_info.linux_uid; cifs_sb->mnt_gid = volume_info.linux_gid; cifs_sb->mnt_file_mode = volume_info.file_mode; cifs_sb->mnt_dir_mode = volume_info.dir_mode; - cFYI(1,("file mode: 0x%x dir mode: 0x%x", - cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode)); + cFYI(1, ("file mode: 0x%x dir mode: 0x%x", + cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode)); if (volume_info.noperm) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; @@ -1999,7 +2071,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if (volume_info.override_gid) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID; if (volume_info.direct_io) { - cFYI(1,("mounting share using direct i/o")); + cFYI(1, ("mounting share using direct i/o")); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; } @@ -2010,7 +2082,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("Found match on UNC path")); /* we can have only one retry value for a connection to a share so for resources mounted more than once - to the same server share the last value passed in + to the same server share the last value passed in for the retry flag is used */ tcon->retry = volume_info.retry; tcon->nocase = volume_info.nocase; @@ -2019,17 +2091,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if (tcon == NULL) rc = -ENOMEM; else { - /* check for null share name ie connecting to + /* check for null share name ie connecting to * dfs root */ - /* BB check if this works for exactly length + /* BB check if this works for exactly length * three strings */ if ((strchr(volume_info.UNC + 3, '\\') == NULL) && (strchr(volume_info.UNC + 3, '/') == NULL)) { rc = connect_to_dfs_path(xid, pSesInfo, "", cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); kfree(volume_info.UNC); FreeXid(xid); @@ -2038,7 +2110,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* BB Do we need to wrap sesSem around * this TCon call and Unix SetFS as * we do on SessSetup and reconnect? */ - rc = CIFSTCon(xid, pSesInfo, + rc = CIFSTCon(xid, pSesInfo, volume_info.UNC, tcon, cifs_sb->local_nls); cFYI(1, ("CIFS Tcon rc = %d", rc)); @@ -2075,9 +2147,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, always wake up processes blocked in tcp in recv_mesg then we could remove the send_sig call */ - send_sig(SIGKILL,srvTcp->tsk,1); + force_sig(SIGKILL, srvTcp->tsk); tsk = srvTcp->tsk; - if(tsk) + if (tsk) kthread_stop(tsk); } } @@ -2086,15 +2158,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, tconInfoFree(tcon); if (existingCifsSes == NULL) { if (pSesInfo) { - if ((pSesInfo->server) && + if ((pSesInfo->server) && (pSesInfo->status == CifsGood)) { int temp_rc; temp_rc = CIFSSMBLogoff(xid, pSesInfo); /* if the socketUseCount is now zero */ if ((temp_rc == -ESHUTDOWN) && - (pSesInfo->server) && (pSesInfo->server->tsk)) { + (pSesInfo->server) && + (pSesInfo->server->tsk)) { struct task_struct *tsk; - send_sig(SIGKILL,pSesInfo->server->tsk,1); + force_sig(SIGKILL, + pSesInfo->server->tsk); tsk = pSesInfo->server->tsk; if (tsk) kthread_stop(tsk); @@ -2113,19 +2187,29 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* do not care if following two calls succeed - informational */ CIFSSMBQFSDeviceInfo(xid, tcon); CIFSSMBQFSAttributeInfo(xid, tcon); - + /* tell server which Unix caps we support */ if (tcon->ses->capabilities & CAP_UNIX) + /* reset of caps checks mount to see if unix extensions + disabled for just this mount */ reset_cifs_unix_caps(xid, tcon, sb, &volume_info); - + else + tcon->unix_ext = 0; /* server does not support them */ + + if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { + cifs_sb->rsize = 1024 * 127; +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1, ("no very large read support, rsize now 127K")); +#endif + } if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) cifs_sb->wsize = min(cifs_sb->wsize, (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) - cifs_sb->rsize = min(cifs_sb->rsize, - (tcon->ses->server->maxBuf - - MAX_CIFS_HDR_SIZE)); + cifs_sb->rsize = min(cifs_sb->rsize, + (tcon->ses->server->maxBuf - + MAX_CIFS_HDR_SIZE)); } /* volume_info.password is freed above when existing session found @@ -2178,7 +2262,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, pSMB->req_no_secext.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); pSMB->req_no_secext.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | @@ -2197,7 +2282,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, } pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); - pSMB->req_no_secext.CaseInsensitivePasswordLength = + pSMB->req_no_secext.CaseInsensitivePasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); pSMB->req_no_secext.CaseSensitivePasswordLength = @@ -2215,9 +2300,9 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, } if (user == NULL) bytes_returned = 0; /* skip null user */ - else + else bytes_returned = - cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, + cifs_strtoUCS((__le16 *) bcc_ptr, user, 100, nls_codepage); /* convert number of 16 bit words to bytes */ bcc_ptr += 2 * bytes_returned; @@ -2247,7 +2332,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr += 2 * bytes_returned; bcc_ptr += 2; } else { - if (user != NULL) { + if (user != NULL) { strncpy(bcc_ptr, user, 200); bcc_ptr += strnlen(user, 200); } @@ -2282,11 +2367,12 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, __u16 action = le16_to_cpu(pSMBr->resp.Action); __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); if (action & GUEST_LOGIN) - cFYI(1, (" Guest login")); /* do we want to mark SesInfo struct ? */ - ses->Suid = smb_buffer_response->Uid; /* UID left in wire format (le) */ + cFYI(1, (" Guest login")); /* BB mark SesInfo struct? */ + ses->Suid = smb_buffer_response->Uid; /* UID left in wire format + (little endian) */ cFYI(1, ("UID = %d ", ses->Suid)); - /* response can have either 3 or 4 word count - Samba sends 3 */ - bcc_ptr = pByteArea(smb_buffer_response); + /* response can have either 3 or 4 word count - Samba sends 3 */ + bcc_ptr = pByteArea(smb_buffer_response); if ((pSMBr->resp.hdr.WordCount == 3) || ((pSMBr->resp.hdr.WordCount == 4) && (blob_len < pSMBr->resp.ByteCount))) { @@ -2296,8 +2382,10 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { if ((long) (bcc_ptr) % 2) { remaining_words = - (BCC(smb_buffer_response) - 1) /2; - bcc_ptr++; /* Unicode strings must be word aligned */ + (BCC(smb_buffer_response) - 1) / 2; + /* Unicode strings must be word + aligned */ + bcc_ptr++; } else { remaining_words = BCC(smb_buffer_response) / 2; @@ -2308,13 +2396,15 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ - if(ses->serverOS) + if (ses->serverOS) kfree(ses->serverOS); - ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); - if(ses->serverOS == NULL) + ses->serverOS = kzalloc(2 * (len + 1), + GFP_KERNEL); + if (ses->serverOS == NULL) goto sesssetup_nomem; cifs_strfromUCS_le(ses->serverOS, - (__le16 *)bcc_ptr, len,nls_codepage); + (__le16 *)bcc_ptr, + len, nls_codepage); bcc_ptr += 2 * (len + 1); remaining_words -= len + 1; ses->serverOS[2 * len] = 0; @@ -2323,42 +2413,49 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, len = UniStrnlen((wchar_t *)bcc_ptr, remaining_words-1); kfree(ses->serverNOS); - ses->serverNOS = kzalloc(2 * (len + 1),GFP_KERNEL); - if(ses->serverNOS == NULL) + ses->serverNOS = kzalloc(2 * (len + 1), + GFP_KERNEL); + if (ses->serverNOS == NULL) goto sesssetup_nomem; cifs_strfromUCS_le(ses->serverNOS, - (__le16 *)bcc_ptr,len,nls_codepage); + (__le16 *)bcc_ptr, + len, nls_codepage); bcc_ptr += 2 * (len + 1); ses->serverNOS[2 * len] = 0; ses->serverNOS[1 + (2 * len)] = 0; - if(strncmp(ses->serverNOS, - "NT LAN Manager 4",16) == 0) { - cFYI(1,("NT4 server")); + if (strncmp(ses->serverNOS, + "NT LAN Manager 4", 16) == 0) { + cFYI(1, ("NT4 server")); ses->flags |= CIFS_SES_NT4; } remaining_words -= len + 1; if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); - /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ - if(ses->serverDomain) + /* last string is not always null terminated + (for e.g. for Windows XP & 2000) */ + if (ses->serverDomain) kfree(ses->serverDomain); ses->serverDomain = - kzalloc(2*(len+1),GFP_KERNEL); - if(ses->serverDomain == NULL) + kzalloc(2*(len+1), + GFP_KERNEL); + if (ses->serverDomain == NULL) goto sesssetup_nomem; cifs_strfromUCS_le(ses->serverDomain, - (__le16 *)bcc_ptr,len,nls_codepage); + (__le16 *)bcc_ptr, + len, nls_codepage); bcc_ptr += 2 * (len + 1); ses->serverDomain[2*len] = 0; ses->serverDomain[1+(2*len)] = 0; - } /* else no more room so create dummy domain string */ - else { - if(ses->serverDomain) + } else { /* else no more room so create + dummy domain string */ + if (ses->serverDomain) kfree(ses->serverDomain); - ses->serverDomain = + ses->serverDomain = kzalloc(2, GFP_KERNEL); } - } else { /* no room so create dummy domain and NOS string */ + } else { /* no room so create dummy domain + and NOS string */ + /* if these kcallocs fail not much we can do, but better to not fail the sesssetup itself */ @@ -2375,19 +2472,22 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { kfree(ses->serverOS); - ses->serverOS = kzalloc(len + 1,GFP_KERNEL); - if(ses->serverOS == NULL) + ses->serverOS = kzalloc(len + 1, + GFP_KERNEL); + if (ses->serverOS == NULL) goto sesssetup_nomem; - strncpy(ses->serverOS,bcc_ptr, len); + strncpy(ses->serverOS, bcc_ptr, len); bcc_ptr += len; - bcc_ptr[0] = 0; /* null terminate the string */ + /* null terminate the string */ + bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); kfree(ses->serverNOS); - ses->serverNOS = kzalloc(len + 1,GFP_KERNEL); - if(ses->serverNOS == NULL) + ses->serverNOS = kzalloc(len + 1, + GFP_KERNEL); + if (ses->serverNOS == NULL) goto sesssetup_nomem; strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; @@ -2395,23 +2495,27 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr++; len = strnlen(bcc_ptr, 1024); - if(ses->serverDomain) + if (ses->serverDomain) kfree(ses->serverDomain); - ses->serverDomain = kzalloc(len + 1,GFP_KERNEL); - if(ses->serverDomain == NULL) + ses->serverDomain = kzalloc(len + 1, + GFP_KERNEL); + if (ses->serverDomain == NULL) goto sesssetup_nomem; - strncpy(ses->serverDomain, bcc_ptr, len); + strncpy(ses->serverDomain, bcc_ptr, + len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; } else cFYI(1, - ("Variable field of length %d extends beyond end of smb ", + ("Variable field of length %d " + "extends beyond end of smb ", len)); } } else { cERROR(1, - (" Security Blob Length extends beyond end of SMB")); + (" Security Blob Length extends beyond " + "end of SMB")); } } else { cERROR(1, @@ -2430,7 +2534,7 @@ sesssetup_nomem: /* do not return an error on nomem for the info strings, static int CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, - struct cifsSesInfo *ses, int * pNTLMv2_flag, + struct cifsSesInfo *ses, int *pNTLMv2_flag, const struct nls_table *nls_codepage) { struct smb_hdr *smb_buffer; @@ -2450,7 +2554,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, __u16 count; cFYI(1, ("In NTLMSSP sesssetup (negotiate)")); - if(ses == NULL) + if (ses == NULL) return -EINVAL; domain = ses->domainName; *pNTLMv2_flag = FALSE; @@ -2474,7 +2578,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | @@ -2502,9 +2606,9 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_56 | /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128; - if(sign_CIFS_PDUs) + if (sign_CIFS_PDUs) negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN; -/* if(ntlmv2_support) +/* if (ntlmv2_support) negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/ /* setup pointers to domain name and workstation name */ bcc_ptr += SecurityBlobLength; @@ -2574,11 +2678,11 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); if (action & GUEST_LOGIN) - cFYI(1, (" Guest login")); - /* Do we want to set anything in SesInfo struct when guest login? */ + cFYI(1, (" Guest login")); + /* Do we want to set anything in SesInfo struct when guest login? */ - bcc_ptr = pByteArea(smb_buffer_response); - /* response can have either 3 or 4 word count - Samba sends 3 */ + bcc_ptr = pByteArea(smb_buffer_response); + /* response can have either 3 or 4 word count - Samba sends 3 */ SecurityBlob2 = (PCHALLENGE_MESSAGE) bcc_ptr; if (SecurityBlob2->MessageType != NtLmChallenge) { @@ -2586,7 +2690,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, ("Unexpected NTLMSSP message type received %d", SecurityBlob2->MessageType)); } else if (ses) { - ses->Suid = smb_buffer_response->Uid; /* UID left in le format */ + ses->Suid = smb_buffer_response->Uid; /* UID left in le format */ cFYI(1, ("UID = %d", ses->Suid)); if ((pSMBr->resp.hdr.WordCount == 3) || ((pSMBr->resp.hdr.WordCount == 4) @@ -2604,18 +2708,18 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, memcpy(ses->server->cryptKey, SecurityBlob2->Challenge, CIFS_CRYPTO_KEY_SIZE); - if(SecurityBlob2->NegotiateFlags & + if (SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2)) *pNTLMv2_flag = TRUE; - if((SecurityBlob2->NegotiateFlags & - cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) + if ((SecurityBlob2->NegotiateFlags & + cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN)) || (sign_CIFS_PDUs > 1)) - ses->server->secMode |= - SECMODE_SIGN_REQUIRED; - if ((SecurityBlob2->NegotiateFlags & + ses->server->secMode |= + SECMODE_SIGN_REQUIRED; + if ((SecurityBlob2->NegotiateFlags & cpu_to_le32(NTLMSSP_NEGOTIATE_SIGN)) && (sign_CIFS_PDUs)) - ses->server->secMode |= + ses->server->secMode |= SECMODE_SIGN_ENABLED; if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { @@ -2623,7 +2727,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, remaining_words = (BCC(smb_buffer_response) - 1) / 2; - bcc_ptr++; /* Unicode strings must be word aligned */ + /* Must word align unicode strings */ + bcc_ptr++; } else { remaining_words = BCC @@ -2635,7 +2740,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ - if(ses->serverOS) + if (ses->serverOS) kfree(ses->serverOS); ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); @@ -2668,8 +2773,9 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, (2 * len)] = 0; remaining_words -= len + 1; if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); - /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ + len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); + /* last string not always null terminated + (for e.g. for Windows XP & 2000) */ kfree(ses->serverDomain); ses->serverDomain = kzalloc(2 * @@ -2707,7 +2813,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - if(ses->serverOS) + if (ses->serverOS) kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, @@ -2734,18 +2840,20 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, ses->serverDomain = kzalloc(len + 1, GFP_KERNEL); - strncpy(ses->serverDomain, bcc_ptr, len); + strncpy(ses->serverDomain, + bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; } else cFYI(1, - ("Variable field of length %d extends beyond end of smb", + ("field of length %d " + "extends beyond end of smb", len)); } } else { - cERROR(1, - (" Security Blob Length extends beyond end of SMB")); + cERROR(1, ("Security Blob Length extends beyond" + " end of SMB")); } } else { cERROR(1, ("No session structure passed in.")); @@ -2784,7 +2892,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, __u16 count; cFYI(1, ("In NTLMSSPSessSetup (Authenticate)")); - if(ses == NULL) + if (ses == NULL) return -EINVAL; user = ses->userName; domain = ses->domainName; @@ -2809,7 +2917,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, pSMB->req.hdr.Uid = ses->Suid; - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | @@ -2833,13 +2941,13 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8); SecurityBlob->MessageType = NtLmAuthenticate; bcc_ptr += SecurityBlobLength; - negotiate_flags = + negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO | 0x80000000 | NTLMSSP_NEGOTIATE_128; - if(sign_CIFS_PDUs) + if (sign_CIFS_PDUs) negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN; - if(ntlmv2_flag) + if (ntlmv2_flag) negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2; /* setup pointers to domain name and workstation name */ @@ -2903,13 +3011,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, cpu_to_le16(len); } - /* SecurityBlob->WorkstationName.Length = cifs_strtoUCS((__le16 *) bcc_ptr, "AMACHINE",64, nls_codepage); + /* SecurityBlob->WorkstationName.Length = + cifs_strtoUCS((__le16 *) bcc_ptr, "AMACHINE",64, nls_codepage); SecurityBlob->WorkstationName.Length *= 2; - SecurityBlob->WorkstationName.MaximumLength = cpu_to_le16(SecurityBlob->WorkstationName.Length); - SecurityBlob->WorkstationName.Buffer = cpu_to_le32(SecurityBlobLength); + SecurityBlob->WorkstationName.MaximumLength = + cpu_to_le16(SecurityBlob->WorkstationName.Length); + SecurityBlob->WorkstationName.Buffer = + cpu_to_le32(SecurityBlobLength); bcc_ptr += SecurityBlob->WorkstationName.Length; SecurityBlobLength += SecurityBlob->WorkstationName.Length; - SecurityBlob->WorkstationName.Length = cpu_to_le16(SecurityBlob->WorkstationName.Length); */ + SecurityBlob->WorkstationName.Length = + cpu_to_le16(SecurityBlob->WorkstationName.Length); */ if ((long) bcc_ptr % 2) { *bcc_ptr = 0; @@ -2995,17 +3107,20 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); if (action & GUEST_LOGIN) - cFYI(1, (" Guest login")); /* BB do we want to set anything in SesInfo struct ? */ -/* if(SecurityBlob2->MessageType != NtLm??){ - cFYI("Unexpected message type on auth response is %d ")); - } */ + cFYI(1, (" Guest login")); /* BB Should we set anything + in SesInfo struct ? */ +/* if (SecurityBlob2->MessageType != NtLm??) { + cFYI("Unexpected message type on auth response is %d")); + } */ + if (ses) { cFYI(1, - ("Does UID on challenge %d match auth response UID %d ", + ("Check challenge UID %d vs auth response UID %d", ses->Suid, smb_buffer_response->Uid)); - ses->Suid = smb_buffer_response->Uid; /* UID left in wire format */ - bcc_ptr = pByteArea(smb_buffer_response); - /* response can have either 3 or 4 word count - Samba sends 3 */ + /* UID left in wire format */ + ses->Suid = smb_buffer_response->Uid; + bcc_ptr = pByteArea(smb_buffer_response); + /* response can have either 3 or 4 word count - Samba sends 3 */ if ((pSMBr->resp.hdr.WordCount == 3) || ((pSMBr->resp.hdr.WordCount == 4) && (blob_len < @@ -3035,7 +3150,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ - if(ses->serverOS) + if (ses->serverOS) kfree(ses->serverOS); ses->serverOS = kzalloc(2 * (len + 1), GFP_KERNEL); @@ -3067,9 +3182,9 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, ses->serverNOS[1+(2*len)] = 0; remaining_words -= len + 1; if (remaining_words > 0) { - len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); + len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string not always null terminated (e.g. for Windows XP & 2000) */ - if(ses->serverDomain) + if (ses->serverDomain) kfree(ses->serverDomain); ses->serverDomain = kzalloc(2 * @@ -3097,12 +3212,12 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, = 0; } /* else no more room so create dummy domain string */ else { - if(ses->serverDomain) + if (ses->serverDomain) kfree(ses->serverDomain); ses->serverDomain = kzalloc(2,GFP_KERNEL); } } else { /* no room so create dummy domain and NOS string */ - if(ses->serverDomain) + if (ses->serverDomain) kfree(ses->serverDomain); ses->serverDomain = kzalloc(2, GFP_KERNEL); kfree(ses->serverNOS); @@ -3110,10 +3225,10 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); - if (((long) bcc_ptr + len) - - (long) pByteArea(smb_buffer_response) - <= BCC(smb_buffer_response)) { - if(ses->serverOS) + if (((long) bcc_ptr + len) - + (long) pByteArea(smb_buffer_response) + <= BCC(smb_buffer_response)) { + if (ses->serverOS) kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1,GFP_KERNEL); strncpy(ses->serverOS,bcc_ptr, len); @@ -3124,28 +3239,35 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, len = strnlen(bcc_ptr, 1024); kfree(ses->serverNOS); - ses->serverNOS = kzalloc(len+1,GFP_KERNEL); - strncpy(ses->serverNOS, bcc_ptr, len); + ses->serverNOS = kzalloc(len+1, + GFP_KERNEL); + strncpy(ses->serverNOS, + bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); - if(ses->serverDomain) + if (ses->serverDomain) kfree(ses->serverDomain); - ses->serverDomain = kzalloc(len+1,GFP_KERNEL); - strncpy(ses->serverDomain, bcc_ptr, len); + ses->serverDomain = + kzalloc(len+1, + GFP_KERNEL); + strncpy(ses->serverDomain, + bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; } else cFYI(1, - ("Variable field of length %d extends beyond end of smb ", + ("field of length %d " + "extends beyond end of smb ", len)); } } else { cERROR(1, - (" Security Blob Length extends beyond end of SMB")); + (" Security Blob extends beyond end " + "of SMB")); } } else { cERROR(1, ("No session structure passed in.")); @@ -3197,7 +3319,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, pSMB->AndXCommand = 0xFF; pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); bcc_ptr = &pSMB->Password[0]; - if((ses->server->secMode) & SECMODE_USER) { + if ((ses->server->secMode) & SECMODE_USER) { pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ *bcc_ptr = 0; /* password is null byte */ bcc_ptr++; /* skip password */ @@ -3211,7 +3333,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, by Samba (not sure whether other servers allow NTLMv2 password here) */ #ifdef CONFIG_CIFS_WEAK_PW_HASH - if((extended_security & CIFSSEC_MAY_LANMAN) && + if ((extended_security & CIFSSEC_MAY_LANMAN) && (ses->server->secType == LANMAN)) calc_lanman_hash(ses, bcc_ptr); else @@ -3221,14 +3343,14 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr); bcc_ptr += CIFS_SESS_KEY_SIZE; - if(ses->capabilities & CAP_UNICODE) { + if (ses->capabilities & CAP_UNICODE) { /* must align unicode strings */ *bcc_ptr = 0; /* null byte password */ bcc_ptr++; } } - if(ses->server->secMode & + if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; @@ -3241,8 +3363,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, if (ses->capabilities & CAP_UNICODE) { smb_buffer->Flags2 |= SMBFLG2_UNICODE; length = - cifs_strtoUCS((__le16 *) bcc_ptr, tree, - 6 /* max utf8 char length in bytes */ * + cifs_strtoUCS((__le16 *) bcc_ptr, tree, + 6 /* max utf8 char length in bytes */ * (/* server len*/ + 256 /* share len */), nls_codepage); bcc_ptr += 2 * length; /* convert num 16 bit words to bytes */ bcc_ptr += 2; /* skip trailing null */ @@ -3266,8 +3388,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2); - /* skip service field (NB: this field is always ASCII) */ - bcc_ptr += length + 1; + /* skip service field (NB: this field is always ASCII) */ + bcc_ptr += length + 1; strncpy(tcon->treeName, tree, MAX_TREE_SIZE); if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { length = UniStrnlen((wchar_t *) bcc_ptr, 512); @@ -3285,7 +3407,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, bcc_ptr[1] = 0; bcc_ptr += 2; } - /* else do not bother copying these informational fields */ + /* else do not bother copying these information fields*/ } else { length = strnlen(bcc_ptr, 1024); if ((bcc_ptr + length) - @@ -3297,9 +3419,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, strncpy(tcon->nativeFileSystem, bcc_ptr, length); } - /* else do not bother copying these informational fields */ + /* else do not bother copying these information fields*/ } - if((smb_buffer_response->WordCount == 3) || + if ((smb_buffer_response->WordCount == 3) || (smb_buffer_response->WordCount == 7)) /* field is in same location */ tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); @@ -3307,7 +3429,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, tcon->Flags = 0; cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags)); } else if ((rc == 0) && tcon == NULL) { - /* all we need to save for IPC$ connection */ + /* all we need to save for IPC$ connection */ ses->ipc_tid = smb_buffer_response->Tid; } @@ -3323,7 +3445,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) int xid; struct cifsSesInfo *ses = NULL; struct task_struct *cifsd_task; - char * tmp; + char *tmp; xid = GetXid(); @@ -3344,9 +3466,9 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) FreeXid(xid); return 0; } else if (rc == -ESHUTDOWN) { - cFYI(1,("Waking up socket by sending it signal")); + cFYI(1, ("Waking up socket by sending signal")); if (cifsd_task) { - send_sig(SIGKILL,cifsd_task,1); + force_sig(SIGKILL, cifsd_task); kthread_stop(cifsd_task); } rc = 0; @@ -3355,7 +3477,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) } else cFYI(1, ("No session or bad tcon")); } - + cifs_sb->tcon = NULL; tmp = cifs_sb->prepath; cifs_sb->prepathlen = 0; @@ -3367,11 +3489,11 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) sesInfoFree(ses); FreeXid(xid); - return rc; /* BB check if we should always return zero here */ -} + return rc; /* BB check if we should always return zero here */ +} int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, - struct nls_table * nls_info) + struct nls_table *nls_info) { int rc = 0; char ntlm_session_key[CIFS_SESS_KEY_SIZE]; @@ -3379,16 +3501,16 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, int first_time = 0; /* what if server changes its buffer size after dropping the session? */ - if(pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { + if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { rc = CIFSSMBNegotiate(xid, pSesInfo); - if(rc == -EAGAIN) /* retry only once on 1st time connection */ { + if (rc == -EAGAIN) /* retry only once on 1st time connection */ { rc = CIFSSMBNegotiate(xid, pSesInfo); - if(rc == -EAGAIN) + if (rc == -EAGAIN) rc = -EHOSTDOWN; } - if(rc == 0) { + if (rc == 0) { spin_lock(&GlobalMid_Lock); - if(pSesInfo->server->tcpStatus != CifsExiting) + if (pSesInfo->server->tcpStatus != CifsExiting) pSesInfo->server->tcpStatus = CifsGood; else rc = -EHOSTDOWN; @@ -3400,18 +3522,19 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, if (!rc) { pSesInfo->flags = 0; pSesInfo->capabilities = pSesInfo->server->capabilities; - if(linuxExtEnabled == 0) + if (linuxExtEnabled == 0) pSesInfo->capabilities &= (~CAP_UNIX); /* pSesInfo->sequence_number = 0;*/ - cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", + cFYI(1, + ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", pSesInfo->server->secMode, pSesInfo->server->capabilities, pSesInfo->server->timeAdj)); - if(experimEnabled < 2) + if (experimEnabled < 2) rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); else if (extended_security - && (pSesInfo->capabilities + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) && (pSesInfo->server->secType == NTLMSSP)) { rc = -EOPNOTSUPP; @@ -3424,21 +3547,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, &ntlmv2_flag, nls_info); if (!rc) { - if(ntlmv2_flag) { - char * v2_response; - cFYI(1,("more secure NTLM ver2 hash")); - if(CalcNTLMv2_partial_mac_key(pSesInfo, + if (ntlmv2_flag) { + char *v2_response; + cFYI(1, ("more secure NTLM ver2 hash")); + if (CalcNTLMv2_partial_mac_key(pSesInfo, nls_info)) { rc = -ENOMEM; goto ss_err_exit; } else v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); - if(v2_response) { - CalcNTLMv2_response(pSesInfo,v2_response); - /* if(first_time) - cifs_calculate_ntlmv2_mac_key( - pSesInfo->server->mac_signing_key, - response, ntlm_session_key, */ + if (v2_response) { + CalcNTLMv2_response(pSesInfo, + v2_response); + /* if (first_time) + cifs_calculate_ntlmv2_mac_key( + pSesInfo->server->mac_signing_key, + response, ntlm_session_key,*/ kfree(v2_response); /* BB Put dummy sig in SessSetup PDU? */ } else { @@ -3451,9 +3575,9 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, pSesInfo->server->cryptKey, ntlm_session_key); - if(first_time) + if (first_time) cifs_calculate_mac_key( - pSesInfo->server->mac_signing_key, + &pSesInfo->server->mac_signing_key, ntlm_session_key, pSesInfo->password); } @@ -3471,18 +3595,18 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, pSesInfo->server->cryptKey, ntlm_session_key); - if(first_time) + if (first_time) cifs_calculate_mac_key( - pSesInfo->server->mac_signing_key, + &pSesInfo->server->mac_signing_key, ntlm_session_key, pSesInfo->password); rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); } if (rc) { - cERROR(1,("Send error in SessSetup = %d",rc)); + cERROR(1, ("Send error in SessSetup = %d", rc)); } else { - cFYI(1,("CIFS Session Established successfully")); + cFYI(1, ("CIFS Session Established successfully")); pSesInfo->status = CifsGood; } } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 8e86aaceb68a..4830acc86d74 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,10 +135,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; - FILE_ALL_INFO * buf = NULL; + FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; - struct cifsFileInfo * pCifsFile = NULL; - struct cifsInodeInfo * pCifsInode; + struct cifsFileInfo *pCifsFile = NULL; + struct cifsInodeInfo *pCifsInode; int disposition = FILE_OVERWRITE_IF; int write_only = FALSE; @@ -207,8 +207,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, } else { /* If Open reported that we actually created a file then we now have to set the mode if possible */ - if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (oplock & CIFS_CREATE_ACTION)) { + if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, @@ -235,8 +234,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* Could set r/o dos attribute if mode & 0222 == 0 */ } - /* BB server might mask mode so we have to query for Unix case*/ - if (pTcon->ses->capabilities & CAP_UNIX) + /* server might mask mode so we have to query for it */ + if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); else { @@ -264,7 +263,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, direntry->d_op = &cifs_dentry_ops; d_instantiate(direntry, newinode); } - if ((nd->flags & LOOKUP_OPEN) == FALSE) { + if ((nd == NULL /* nfsd case - nfs srv does not set nd */) || + ((nd->flags & LOOKUP_OPEN) == FALSE)) { /* mknod case - do not leave file open */ CIFSSMBClose(xid, pTcon, fileHandle); } else if (newinode) { @@ -323,7 +323,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; - struct inode * newinode = NULL; + struct inode *newinode = NULL; if (!old_valid_dev(device_number)) return -EINVAL; @@ -336,7 +336,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, full_path = build_path_from_dentry(direntry); if (full_path == NULL) rc = -ENOMEM; - else if (pTcon->ses->capabilities & CAP_UNIX) { + else if (pTcon->unix_ext) { mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, @@ -490,7 +490,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, cFYI(1, (" Full path: %s inode = 0x%p", full_path, direntry->d_inode)); - if (pTcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&newInode, full_path, parent_dir_inode->i_sb, xid); else diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 96df1d51fdc3..893fd0aebff8 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c @@ -5,7 +5,7 @@ * Author(s): Steve French (sfrench@us.ibm.com) * * Common Internet FileSystem (CIFS) client - * + * * Operations related to support for exporting files via NFSD * * This library is free software; you can redistribute it and/or modify @@ -22,32 +22,45 @@ * along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - - /* + + /* * See Documentation/filesystems/Exporting * and examples in fs/exportfs + * + * Since cifs is a network file system, an "fsid" must be included for + * any nfs exports file entries which refer to cifs paths. In addition + * the cifs mount must be mounted with the "serverino" option (ie use stable + * server inode numbers instead of locally generated temporary ones). + * Although cifs inodes do not use generation numbers (have generation number + * of zero) - the inode number alone should be good enough for simple cases + * in which users want to export cifs shares with NFS. The decode and encode + * could be improved by using a new routine which expects 64 bit inode numbers + * instead of the default 32 bit routines in fs/exportfs + * */ #include <linux/fs.h> #include <linux/exportfs.h> - +#include "cifsglob.h" +#include "cifs_debug.h" + #ifdef CONFIG_CIFS_EXPERIMENTAL - static struct dentry *cifs_get_parent(struct dentry *dentry) { - /* BB need to add code here eventually to enable export via NFSD */ - return ERR_PTR(-EACCES); + /* BB need to add code here eventually to enable export via NFSD */ + cFYI(1, ("get parent for %p", dentry)); + return ERR_PTR(-EACCES); } - + struct export_operations cifs_export_ops = { - .get_parent = cifs_get_parent, -/* Following five export operations are unneeded so far and can default */ -/* .get_dentry = - .get_name = - .find_exported_dentry = - .decode_fh = - .encode_fs = */ - }; - + .get_parent = cifs_get_parent, +/* Following five export operations are unneeded so far and can default: + .get_dentry = + .get_name = + .find_exported_dentry = + .decode_fh = + .encode_fs = */ +}; + #endif /* EXPERIMENTAL */ - + diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c index 8e375bb4b379..995474c90885 100644 --- a/fs/cifs/fcntl.c +++ b/fs/cifs/fcntl.c @@ -66,7 +66,7 @@ static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) return cifs_ntfy_flags; } -int cifs_dir_notify(struct file * file, unsigned long arg) +int cifs_dir_notify(struct file *file, unsigned long arg) { int xid; int rc = -EINVAL; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 94d5b49049df..e13592afca9c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2,8 +2,8 @@ * fs/cifs/file.c * * vfs operations that deal with files - * - * Copyright (C) International Business Machines Corp., 2002,2003 + * + * Copyright (C) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * Jeremy Allison (jra@samba.org) * @@ -45,7 +45,7 @@ static inline struct cifsFileInfo *cifs_init_private( { memset(private_data, 0, sizeof(struct cifsFileInfo)); private_data->netfid = netfid; - private_data->pid = current->tgid; + private_data->pid = current->tgid; init_MUTEX(&private_data->fh_sem); mutex_init(&private_data->lock_mutex); INIT_LIST_HEAD(&private_data->llist); @@ -57,7 +57,7 @@ static inline struct cifsFileInfo *cifs_init_private( does not tell us which handle the write is for so there can be a close (overlapping with write) of the filehandle that cifs_writepages chose to use */ - atomic_set(&private_data->wrtPending,0); + atomic_set(&private_data->wrtPending, 0); return private_data; } @@ -105,7 +105,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, in the list so we do not have to walk the list to search for one in prepare_write */ if ((file->f_flags & O_ACCMODE) == O_WRONLY) { - list_add_tail(&pCifsFile->flist, + list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); } else { list_add(&pCifsFile->flist, @@ -138,7 +138,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, } client_can_cache: - if (pTcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode, full_path, inode->i_sb, xid); else @@ -189,7 +189,7 @@ int cifs_open(struct inode *inode, struct file *file) /* needed for writepage */ pCifsFile->pfile = file; - + file->private_data = pCifsFile; break; } @@ -212,15 +212,15 @@ int cifs_open(struct inode *inode, struct file *file) return -ENOMEM; } - cFYI(1, (" inode = 0x%p file flags are 0x%x for %s", + cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags, full_path)); desiredAccess = cifs_convert_flags(file->f_flags); /********************************************************************* * open flag mapping table: - * + * * POSIX Flag CIFS Disposition - * ---------- ---------------- + * ---------- ---------------- * O_CREAT FILE_OPEN_IF * O_CREAT | O_EXCL FILE_CREATE * O_CREAT | O_TRUNC FILE_OVERWRITE_IF @@ -228,12 +228,12 @@ int cifs_open(struct inode *inode, struct file *file) * none of the above FILE_OPEN * * Note that there is not a direct match between disposition - * FILE_SUPERSEDE (ie create whether or not file exists although + * FILE_SUPERSEDE (ie create whether or not file exists although * O_CREAT | O_TRUNC is similar but truncates the existing * file rather than creating a new file as FILE_SUPERSEDE does * (which uses the attributes / metadata passed in on open call) *? - *? O_SYNC is a reasonable match to CIFS writethrough flag + *? O_SYNC is a reasonable match to CIFS writethrough flag *? and the read write flags match reasonably. O_LARGEFILE *? is irrelevant because largefile support is always used *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, @@ -253,8 +253,8 @@ int cifs_open(struct inode *inode, struct file *file) and calling get_inode_info with returned buf (at least helps non-Unix server case) */ - /* BB we can not do this if this is the second open of a file - and the first handle has writebehind data, we might be + /* BB we can not do this if this is the second open of a file + and the first handle has writebehind data, we might be able to simply do a filemap_fdatawrite/filemap_fdatawait first */ buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (!buf) { @@ -263,7 +263,7 @@ int cifs_open(struct inode *inode, struct file *file) } if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) - rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, + rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -300,15 +300,15 @@ int cifs_open(struct inode *inode, struct file *file) write_unlock(&GlobalSMBSeslock); } - if (oplock & CIFS_CREATE_ACTION) { + if (oplock & CIFS_CREATE_ACTION) { /* time to set mode which we can not set earlier due to problems creating new read-only files */ - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { + if (pTcon->unix_ext) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, inode->i_mode, (__u64)-1, (__u64)-1, 0 /* dev */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { /* BB implement via Windows security descriptors eg @@ -345,7 +345,7 @@ static int cifs_reopen_file(struct file *file, int can_flush) struct cifsTconInfo *pTcon; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; - struct inode * inode; + struct inode *inode; char *full_path = NULL; int desiredAccess; int disposition = FILE_OPEN; @@ -372,13 +372,13 @@ static int cifs_reopen_file(struct file *file, int can_flush) } inode = file->f_path.dentry->d_inode; - if(inode == NULL) { + if (inode == NULL) { cERROR(1, ("inode not valid")); dump_stack(); rc = -EBADF; goto reopen_error_exit; } - + cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; @@ -396,7 +396,7 @@ reopen_error_exit: } cFYI(1, ("inode = 0x%p file flags 0x%x for %s", - inode, file->f_flags,full_path)); + inode, file->f_flags, full_path)); desiredAccess = cifs_convert_flags(file->f_flags); if (oplockEnabled) @@ -405,14 +405,14 @@ reopen_error_exit: oplock = FALSE; /* Can not refresh inode by passing in file_info buf to be returned - by SMBOpen and then calling get_inode_info with returned buf - since file might have write behind data that needs to be flushed + by SMBOpen and then calling get_inode_info with returned buf + since file might have write behind data that needs to be flushed and server version of file size can be stale. If we knew for sure that inode was not dirty locally we could do this */ rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, NULL, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { up(&pCifsFile->fh_sem); @@ -430,7 +430,7 @@ reopen_error_exit: go to server to get inode info */ pCifsInode->clientCanCacheAll = FALSE; pCifsInode->clientCanCacheRead = FALSE; - if (pTcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, xid); else @@ -486,23 +486,24 @@ int cifs_close(struct inode *inode, struct file *file) already closed */ if (pTcon->tidStatus != CifsNeedReconnect) { int timeout = 2; - while((atomic_read(&pSMBFile->wrtPending) != 0) + while ((atomic_read(&pSMBFile->wrtPending) != 0) && (timeout < 1000) ) { /* Give write a better chance to get to server ahead of the close. We do not want to add a wait_q here as it would increase the memory utilization as the struct would be in each open file, - but this should give enough time to + but this should give enough time to clear the socket */ #ifdef CONFIG_CIFS_DEBUG2 - cFYI(1,("close delay, write pending")); + cFYI(1, ("close delay, write pending")); #endif /* DEBUG2 */ msleep(timeout); timeout *= 4; } - if(atomic_read(&pSMBFile->wrtPending)) - cERROR(1,("close with pending writes")); + if (atomic_read(&pSMBFile->wrtPending)) + cERROR(1, + ("close with pending writes")); rc = CIFSSMBClose(xid, pTcon, pSMBFile->netfid); } @@ -534,7 +535,7 @@ int cifs_close(struct inode *inode, struct file *file) CIFS_I(inode)->clientCanCacheRead = FALSE; CIFS_I(inode)->clientCanCacheAll = FALSE; } - if ((rc ==0) && CIFS_I(inode)->write_behind_rc) + if ((rc == 0) && CIFS_I(inode)->write_behind_rc) rc = CIFS_I(inode)->write_behind_rc; FreeXid(xid); return rc; @@ -554,7 +555,8 @@ int cifs_closedir(struct inode *inode, struct file *file) if (pCFileStruct) { struct cifsTconInfo *pTcon; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct cifs_sb_info *cifs_sb = + CIFS_SB(file->f_path.dentry->d_sb); pTcon = cifs_sb->tcon; @@ -572,7 +574,7 @@ int cifs_closedir(struct inode *inode, struct file *file) if (ptmp) { cFYI(1, ("closedir free smb buf in srch struct")); pCFileStruct->srch_inf.ntwrk_buf_start = NULL; - if(pCFileStruct->srch_inf.smallBuf) + if (pCFileStruct->srch_inf.smallBuf) cifs_small_buf_release(ptmp); else cifs_buf_release(ptmp); @@ -594,7 +596,8 @@ int cifs_closedir(struct inode *inode, struct file *file) static int store_file_lock(struct cifsFileInfo *fid, __u64 len, __u64 offset, __u8 lockType) { - struct cifsLockInfo *li = kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); + struct cifsLockInfo *li = + kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); if (li == NULL) return -ENOMEM; li->offset = offset; @@ -625,8 +628,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) cFYI(1, ("Lock parm: 0x%x flockflags: " "0x%x flocktype: 0x%x start: %lld end: %lld", - cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start, - pfLock->fl_end)); + cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start, + pfLock->fl_end)); if (pfLock->fl_flags & FL_POSIX) cFYI(1, ("Posix")); @@ -641,7 +644,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) "not implemented yet")); if (pfLock->fl_flags & FL_LEASE) cFYI(1, ("Lease on file - not implemented yet")); - if (pfLock->fl_flags & + if (pfLock->fl_flags & (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags)); @@ -683,9 +686,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) account for negative length which we can not accept over the wire */ if (IS_GETLK(cmd)) { - if(posix_locking) { + if (posix_locking) { int posix_lock_type; - if(lockType & LOCKING_ANDX_SHARED_LOCK) + if (lockType & LOCKING_ANDX_SHARED_LOCK) posix_lock_type = CIFS_RDLCK; else posix_lock_type = CIFS_WRLCK; @@ -700,7 +703,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, 0, 1, lockType, 0 /* wait flag */ ); if (rc == 0) { - rc = CIFSSMBLock(xid, pTcon, netfid, length, + rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, 1 /* numUnlock */ , 0 /* numLock */ , lockType, 0 /* wait flag */ ); @@ -729,22 +732,24 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) if (posix_locking) { int posix_lock_type; - if(lockType & LOCKING_ANDX_SHARED_LOCK) + if (lockType & LOCKING_ANDX_SHARED_LOCK) posix_lock_type = CIFS_RDLCK; else posix_lock_type = CIFS_WRLCK; - - if(numUnlock == 1) + + if (numUnlock == 1) posix_lock_type = CIFS_UNLCK; rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, length, pfLock, posix_lock_type, wait_flag); } else { - struct cifsFileInfo *fid = (struct cifsFileInfo *)file->private_data; + struct cifsFileInfo *fid = + (struct cifsFileInfo *)file->private_data; if (numLock) { - rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, + rc = CIFSSMBLock(xid, pTcon, netfid, length, + pfLock->fl_start, 0, numLock, lockType, wait_flag); if (rc == 0) { @@ -763,7 +768,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) list_for_each_entry_safe(li, tmp, &fid->llist, llist) { if (pfLock->fl_start <= li->offset && length >= li->length) { - stored_rc = CIFSSMBLock(xid, pTcon, netfid, + stored_rc = CIFSSMBLock(xid, pTcon, + netfid, li->length, li->offset, 1, 0, li->type, FALSE); if (stored_rc) @@ -805,7 +811,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, if (file->private_data == NULL) return -EBADF; open_file = (struct cifsFileInfo *) file->private_data; - + xid = GetXid(); if (*poffset > file->f_path.dentry->d_inode->i_size) @@ -824,7 +830,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, and blocked, and the file has been freed on us while we blocked so return what we managed to write */ return total_written; - } + } if (open_file->closePend) { FreeXid(xid); if (total_written) @@ -867,8 +873,8 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, /* since the write may have blocked check these pointers again */ if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) { struct inode *inode = file->f_path.dentry->d_inode; -/* Do not update local mtime - server will set its actual value on write - * inode->i_ctime = inode->i_mtime = +/* Do not update local mtime - server will set its actual value on write + * inode->i_ctime = inode->i_mtime = * current_fs_time(inode->i_sb);*/ if (total_written > 0) { spin_lock(&inode->i_lock); @@ -877,7 +883,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, *poffset); spin_unlock(&inode->i_lock); } - mark_inode_dirty_sync(file->f_path.dentry->d_inode); + mark_inode_dirty_sync(file->f_path.dentry->d_inode); } FreeXid(xid); return total_written; @@ -898,13 +904,13 @@ static ssize_t cifs_write(struct file *file, const char *write_data, pTcon = cifs_sb->tcon; - cFYI(1,("write %zd bytes to offset %lld of %s", write_size, + cFYI(1, ("write %zd bytes to offset %lld of %s", write_size, *poffset, file->f_path.dentry->d_name.name)); if (file->private_data == NULL) return -EBADF; open_file = (struct cifsFileInfo *)file->private_data; - + xid = GetXid(); if (*poffset > file->f_path.dentry->d_inode->i_size) @@ -921,10 +927,10 @@ static ssize_t cifs_write(struct file *file, const char *write_data, FreeXid(xid); /* if we have gotten here we have written some data and blocked, and the file has been freed on us - while we blocked so return what we managed to + while we blocked so return what we managed to write */ return total_written; - } + } if (open_file->closePend) { FreeXid(xid); if (total_written) @@ -935,14 +941,14 @@ static ssize_t cifs_write(struct file *file, const char *write_data, if (open_file->invalidHandle) { /* we could deadlock if we called filemap_fdatawait from here so tell - reopen_file not to flush data to + reopen_file not to flush data to server now */ rc = cifs_reopen_file(file, FALSE); if (rc != 0) break; } - if(experimEnabled || (pTcon->ses->server && - ((pTcon->ses->server->secMode & + if (experimEnabled || (pTcon->ses->server && + ((pTcon->ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0))) { struct kvec iov[2]; @@ -976,7 +982,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data, } } else *poffset += bytes_written; - long_op = FALSE; /* subsequent writes fast - + long_op = FALSE; /* subsequent writes fast - 15 seconds is plenty */ } @@ -1009,8 +1015,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) the VFS or MM) should not happen but we had reports of on oops (due to it being zero) during stress testcases so we need to check for it */ - if(cifs_inode == NULL) { - cERROR(1,("Null inode passed to cifs_writeable_file")); + if (cifs_inode == NULL) { + cERROR(1, ("Null inode passed to cifs_writeable_file")); dump_stack(); return NULL; } @@ -1024,13 +1030,14 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) (open_file->pfile->f_flags & O_WRONLY))) { atomic_inc(&open_file->wrtPending); read_unlock(&GlobalSMBSeslock); - if((open_file->invalidHandle) && + if ((open_file->invalidHandle) && (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) { rc = cifs_reopen_file(open_file->pfile, FALSE); /* if it fails, try another handle - might be */ /* dangerous to hold up writepages with retry */ - if(rc) { - cFYI(1,("failed on reopen file in wp")); + if (rc) { + cFYI(1, + ("failed on reopen file in wp")); read_lock(&GlobalSMBSeslock); /* can not use this handle, no write pending on this one after all */ @@ -1082,7 +1089,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) /* check to make sure that we are not extending the file */ if (mapping->host->i_size - offset < (loff_t)to) - to = (unsigned)(mapping->host->i_size - offset); + to = (unsigned)(mapping->host->i_size - offset); open_file = find_writable_file(CIFS_I(mapping->host)); if (open_file) { @@ -1116,8 +1123,8 @@ static int cifs_writepages(struct address_space *mapping, int done = 0; pgoff_t end; pgoff_t index; - int range_whole = 0; - struct kvec * iov; + int range_whole = 0; + struct kvec *iov; int len; int n_iov = 0; pgoff_t next; @@ -1131,7 +1138,7 @@ static int cifs_writepages(struct address_space *mapping, int xid; cifs_sb = CIFS_SB(mapping->host->i_sb); - + /* * If wsize is smaller that the page cache size, default to writing * one page at a time via cifs_writepage @@ -1139,14 +1146,14 @@ static int cifs_writepages(struct address_space *mapping, if (cifs_sb->wsize < PAGE_CACHE_SIZE) return generic_writepages(mapping, wbc); - if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) - if(cifs_sb->tcon->ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - if(!experimEnabled) + if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) + if (cifs_sb->tcon->ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (!experimEnabled) return generic_writepages(mapping, wbc); iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL); - if(iov == NULL) + if (iov == NULL) return generic_writepages(mapping, wbc); @@ -1279,7 +1286,7 @@ retry: 1); atomic_dec(&open_file->wrtPending); if (rc || bytes_written < bytes_to_write) { - cERROR(1,("Write2 ret %d, written = %d", + cERROR(1, ("Write2 ret %d, wrote %d", rc, bytes_written)); /* BB what if continued retry is requested via mount flags? */ @@ -1295,8 +1302,8 @@ retry: success rc but too little data written? */ /* BB investigate retry logic on temporary server crash cases and how recovery works - when page marked as error */ - if(rc) + when page marked as error */ + if (rc) SetPageError(page); kunmap(page); unlock_page(page); @@ -1326,7 +1333,7 @@ retry: return rc; } -static int cifs_writepage(struct page* page, struct writeback_control *wbc) +static int cifs_writepage(struct page *page, struct writeback_control *wbc) { int rc = -EFAULT; int xid; @@ -1334,7 +1341,7 @@ static int cifs_writepage(struct page* page, struct writeback_control *wbc) xid = GetXid(); /* BB add check for wbc flags */ page_cache_get(page); - if (!PageUptodate(page)) { + if (!PageUptodate(page)) { cFYI(1, ("ppw - page not up to date")); } @@ -1348,7 +1355,7 @@ static int cifs_writepage(struct page* page, struct writeback_control *wbc) * Just unlocking the page will cause the radix tree tag-bits * to fail to update with the state of the page correctly. */ - set_page_writeback(page); + set_page_writeback(page); rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); /* BB add check for error and Clearuptodate? */ unlock_page(page); @@ -1368,7 +1375,7 @@ static int cifs_commit_write(struct file *file, struct page *page, char *page_data; xid = GetXid(); - cFYI(1, ("commit write for page %p up to position %lld for %d", + cFYI(1, ("commit write for page %p up to position %lld for %d", page, position, to)); spin_lock(&inode->i_lock); if (position > inode->i_size) { @@ -1396,7 +1403,7 @@ static int cifs_commit_write(struct file *file, struct page *page, rc = 0; /* else if (rc < 0) should we set writebehind rc? */ kunmap(page); - } else { + } else { set_page_dirty(page); } @@ -1412,9 +1419,9 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) xid = GetXid(); - cFYI(1, ("Sync file - name: %s datasync: 0x%x", + cFYI(1, ("Sync file - name: %s datasync: 0x%x", dentry->d_name.name, datasync)); - + rc = filemap_fdatawrite(inode->i_mapping); if (rc == 0) CIFS_I(inode)->write_behind_rc = 0; @@ -1438,7 +1445,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) if (!inode) return; */ -/* fill in rpages then +/* fill in rpages then result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ /* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index)); @@ -1456,7 +1463,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) */ int cifs_flush(struct file *file, fl_owner_t id) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; int rc = 0; /* Rather than do the steps manually: @@ -1471,8 +1478,8 @@ int cifs_flush(struct file *file, fl_owner_t id) rc = filemap_fdatawrite(inode->i_mapping); if (!rc) /* reset wb rc if we were able to write out dirty pages */ CIFS_I(inode)->write_behind_rc = 0; - - cFYI(1, ("Flush inode %p file %p rc %d",inode,file,rc)); + + cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc)); return rc; } @@ -1508,13 +1515,13 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, for (total_read = 0, current_offset = read_data; read_size > total_read; total_read += bytes_read, current_offset += bytes_read) { - current_read_size = min_t(const int, read_size - total_read, + current_read_size = min_t(const int, read_size - total_read, cifs_sb->rsize); rc = -EAGAIN; smb_read_data = NULL; while (rc == -EAGAIN) { int buf_type = CIFS_NO_BUFFER; - if ((open_file->invalidHandle) && + if ((open_file->invalidHandle) && (!open_file->closePend)) { rc = cifs_reopen_file(file, TRUE); if (rc != 0) @@ -1535,9 +1542,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, rc = -EFAULT; } - if(buf_type == CIFS_SMALL_BUFFER) + if (buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(smb_read_data); - else if(buf_type == CIFS_LARGE_BUFFER) + else if (buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(smb_read_data); smb_read_data = NULL; } @@ -1586,21 +1593,21 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, if ((file->f_flags & O_ACCMODE) == O_WRONLY) cFYI(1, ("attempting read on write only file instance")); - for (total_read = 0, current_offset = read_data; + for (total_read = 0, current_offset = read_data; read_size > total_read; total_read += bytes_read, current_offset += bytes_read) { current_read_size = min_t(const int, read_size - total_read, cifs_sb->rsize); /* For windows me and 9x we do not want to request more than it negotiated since it will refuse the read then */ - if((pTcon->ses) && + if ((pTcon->ses) && !(pTcon->ses->capabilities & CAP_LARGE_FILES)) { current_read_size = min_t(const int, current_read_size, pTcon->ses->server->maxBuf - 128); } rc = -EAGAIN; while (rc == -EAGAIN) { - if ((open_file->invalidHandle) && + if ((open_file->invalidHandle) && (!open_file->closePend)) { rc = cifs_reopen_file(file, TRUE); if (rc != 0) @@ -1646,7 +1653,7 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) } -static void cifs_copy_cache_pages(struct address_space *mapping, +static void cifs_copy_cache_pages(struct address_space *mapping, struct list_head *pages, int bytes_read, char *data, struct pagevec *plru_pvec) { @@ -1669,12 +1676,12 @@ static void cifs_copy_cache_pages(struct address_space *mapping, continue; } - target = kmap_atomic(page,KM_USER0); + target = kmap_atomic(page, KM_USER0); if (PAGE_CACHE_SIZE > bytes_read) { memcpy(target, data, bytes_read); /* zero the tail end of this partial page */ - memset(target + bytes_read, 0, + memset(target + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read); bytes_read = 0; } else { @@ -1703,7 +1710,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; int bytes_read = 0; - unsigned int read_size,i; + unsigned int read_size, i; char *smb_read_data = NULL; struct smb_com_read_rsp *pSMBr; struct pagevec lru_pvec; @@ -1720,7 +1727,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); - +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1, ("rpages: num pages %d", num_pages)); +#endif for (i = 0; i < num_pages; ) { unsigned contig_pages; struct page *tmp_page; @@ -1734,14 +1743,14 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, /* count adjacent pages that we will read into */ contig_pages = 0; - expected_index = + expected_index = list_entry(page_list->prev, struct page, lru)->index; - list_for_each_entry_reverse(tmp_page,page_list,lru) { + list_for_each_entry_reverse(tmp_page, page_list, lru) { if (tmp_page->index == expected_index) { contig_pages++; expected_index++; } else - break; + break; } if (contig_pages + i > num_pages) contig_pages = num_pages - i; @@ -1753,10 +1762,13 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, /* Read size needs to be in multiples of one page */ read_size = min_t(const unsigned int, read_size, cifs_sb->rsize & PAGE_CACHE_MASK); - +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1, ("rpages: read size 0x%x contiguous pages %d", + read_size, contig_pages)); +#endif rc = -EAGAIN; while (rc == -EAGAIN) { - if ((open_file->invalidHandle) && + if ((open_file->invalidHandle) && (!open_file->closePend)) { rc = cifs_reopen_file(file, TRUE); if (rc != 0) @@ -1769,11 +1781,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, &bytes_read, &smb_read_data, &buf_type); /* BB more RC checks ? */ - if (rc== -EAGAIN) { + if (rc == -EAGAIN) { if (smb_read_data) { - if(buf_type == CIFS_SMALL_BUFFER) + if (buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(smb_read_data); - else if(buf_type == CIFS_LARGE_BUFFER) + else if (buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(smb_read_data); smb_read_data = NULL; } @@ -1794,10 +1806,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) { i++; /* account for partial page */ - /* server copy of file can have smaller size + /* server copy of file can have smaller size than client */ - /* BB do we need to verify this common case ? - this case is ok - if we are at server EOF + /* BB do we need to verify this common case ? + this case is ok - if we are at server EOF we will hit it on next read */ /* break; */ @@ -1806,14 +1818,14 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, cFYI(1, ("No bytes read (%d) at offset %lld . " "Cleaning remaining pages from readahead list", bytes_read, offset)); - /* BB turn off caching and do new lookup on + /* BB turn off caching and do new lookup on file size at server? */ break; } if (smb_read_data) { - if(buf_type == CIFS_SMALL_BUFFER) + if (buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(smb_read_data); - else if(buf_type == CIFS_LARGE_BUFFER) + else if (buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(smb_read_data); smb_read_data = NULL; } @@ -1824,12 +1836,12 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, /* need to free smb_read_data buf before exit */ if (smb_read_data) { - if(buf_type == CIFS_SMALL_BUFFER) + if (buf_type == CIFS_SMALL_BUFFER) cifs_small_buf_release(smb_read_data); - else if(buf_type == CIFS_LARGE_BUFFER) + else if (buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(smb_read_data); smb_read_data = NULL; - } + } FreeXid(xid); return rc; @@ -1844,26 +1856,26 @@ static int cifs_readpage_worker(struct file *file, struct page *page, page_cache_get(page); read_data = kmap(page); /* for reads over a certain size could initiate async read ahead */ - + rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset); - + if (rc < 0) goto io_error; else - cFYI(1, ("Bytes read %d",rc)); - + cFYI(1, ("Bytes read %d", rc)); + file->f_path.dentry->d_inode->i_atime = current_fs_time(file->f_path.dentry->d_inode->i_sb); - + if (PAGE_CACHE_SIZE > rc) memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc); flush_dcache_page(page); SetPageUptodate(page); rc = 0; - + io_error: - kunmap(page); + kunmap(page); page_cache_release(page); return rc; } @@ -1881,7 +1893,7 @@ static int cifs_readpage(struct file *file, struct page *page) return -EBADF; } - cFYI(1, ("readpage %p at offset %d 0x%x\n", + cFYI(1, ("readpage %p at offset %d 0x%x\n", page, (int)offset, (int)offset)); rc = cifs_readpage_worker(file, page, &offset); @@ -1895,7 +1907,7 @@ static int cifs_readpage(struct file *file, struct page *page) /* We do not want to update the file size from server for inodes open for write - to avoid races with writepage extending the file - in the future we could consider allowing - refreshing the inode only on increases in the file size + refreshing the inode only on increases in the file size but this is tricky to do without racing with writebehind page caching in the current Linux kernel design */ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) @@ -1904,8 +1916,8 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) if (cifsInode) open_file = find_writable_file(cifsInode); - - if(open_file) { + + if (open_file) { struct cifs_sb_info *cifs_sb; /* there is not actually a write pending so let @@ -1915,12 +1927,12 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb); if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) { - /* since no page cache to corrupt on directio + /* since no page cache to corrupt on directio we can change size safely */ return 1; } - if(i_size_read(&cifsInode->vfs_inode) < end_of_file) + if (i_size_read(&cifsInode->vfs_inode) < end_of_file) return 1; return 0; @@ -1935,7 +1947,7 @@ static int cifs_prepare_write(struct file *file, struct page *page, loff_t i_size; loff_t offset; - cFYI(1, ("prepare write for page %p from %d to %d",page,from,to)); + cFYI(1, ("prepare write for page %p from %d to %d", page, from, to)); if (PageUptodate(page)) return 0; @@ -1955,14 +1967,7 @@ static int cifs_prepare_write(struct file *file, struct page *page, * We don't need to read data beyond the end of the file. * zero it, and set the page uptodate */ - void *kaddr = kmap_atomic(page, KM_USER0); - - if (from) - memset(kaddr, 0, from); - if (to < PAGE_CACHE_SIZE) - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + simple_prepare_write(file, page, from, to); SetPageUptodate(page); } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) { /* might as well read a page, it is fast enough */ @@ -1974,8 +1979,8 @@ static int cifs_prepare_write(struct file *file, struct page *page, this will be written out by commit_write so is fine */ } - /* we do not need to pass errors back - e.g. if we do not have read access to the file + /* we do not need to pass errors back + e.g. if we do not have read access to the file because cifs_commit_write will do the right thing. -- shaggy */ return 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f0ff12b3f398..dd4167762a8e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -57,14 +57,14 @@ int cifs_get_inode_info_unix(struct inode **pinode, if (tmp_path == NULL) { return -ENOMEM; } - /* have to skip first of the double backslash of + /* have to skip first of the double backslash of UNC name */ strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE); strncat(tmp_path, search_path, MAX_PATHCONF); rc = connect_to_dfs_path(xid, pTcon->ses, /* treename + */ tmp_path, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); kfree(tmp_path); @@ -81,7 +81,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { *pinode = new_inode(sb); - if (*pinode == NULL) + if (*pinode == NULL) return -ENOMEM; /* Is an i_ino of zero legal? */ /* Are there sanity checks we can use to ensure that @@ -92,7 +92,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, } /* note ino incremented to unique num in new_inode */ if (sb->s_flags & MS_NOATIME) (*pinode)->i_flags |= S_NOATIME | S_NOCMTIME; - + insert_inode_hash(*pinode); } @@ -103,7 +103,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, cifsInfo->time = jiffies; cFYI(1, ("New time %ld", cifsInfo->time)); /* this is ok to set on every inode revalidate */ - atomic_set(&cifsInfo->inUse,1); + atomic_set(&cifsInfo->inUse, 1); inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(findData.LastAccessTime)); @@ -114,8 +114,8 @@ int cifs_get_inode_info_unix(struct inode **pinode, cifs_NTtimeToUnix(le64_to_cpu(findData.LastStatusChange)); inode->i_mode = le64_to_cpu(findData.Permissions); /* since we set the inode type below we need to mask off - to avoid strange results if bits set above */ - inode->i_mode &= ~S_IFMT; + to avoid strange results if bits set above */ + inode->i_mode &= ~S_IFMT; if (type == UNIX_FILE) { inode->i_mode |= S_IFREG; } else if (type == UNIX_SYMLINK) { @@ -137,9 +137,9 @@ int cifs_get_inode_info_unix(struct inode **pinode, } else { /* safest to call it a file if we do not know */ inode->i_mode |= S_IFREG; - cFYI(1,("unknown type %d",type)); + cFYI(1, ("unknown type %d", type)); } - + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) inode->i_uid = cifs_sb->mnt_uid; else @@ -149,7 +149,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, inode->i_gid = cifs_sb->mnt_gid; else inode->i_gid = le64_to_cpu(findData.Gid); - + inode->i_nlink = le64_to_cpu(findData.Nlinks); spin_lock(&inode->i_lock); @@ -183,17 +183,17 @@ int cifs_get_inode_info_unix(struct inode **pinode, inode->i_op = &cifs_file_inode_ops; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - inode->i_fop = + inode->i_fop = &cifs_file_direct_nobrl_ops; else inode->i_fop = &cifs_file_direct_ops; } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) inode->i_fop = &cifs_file_nobrl_ops; - else /* not direct, send byte range locks */ + else /* not direct, send byte range locks */ inode->i_fop = &cifs_file_ops; /* check if server can support readpages */ - if (pTcon->ses->server->maxBuf < + if (pTcon->ses->server->maxBuf < PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else @@ -215,7 +215,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, return rc; } -static int decode_sfu_inode(struct inode * inode, __u64 size, +static int decode_sfu_inode(struct inode *inode, __u64 size, const unsigned char *path, struct cifs_sb_info *cifs_sb, int xid) { @@ -225,7 +225,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size, struct cifsTconInfo *pTcon = cifs_sb->tcon; char buf[24]; unsigned int bytes_read; - char * pbuf; + char *pbuf; pbuf = buf; @@ -235,22 +235,22 @@ static int decode_sfu_inode(struct inode * inode, __u64 size, } else if (size < 8) { return -EINVAL; /* EOPNOTSUPP? */ } - + rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { int buf_type = CIFS_NO_BUFFER; /* Read header */ rc = CIFSSMBRead(xid, pTcon, - netfid, + netfid, 24 /* length */, 0 /* offset */, &bytes_read, &pbuf, &buf_type); if ((rc == 0) && (bytes_read >= 8)) { if (memcmp("IntxBLK", pbuf, 8) == 0) { - cFYI(1,("Block device")); + cFYI(1, ("Block device")); inode->i_mode |= S_IFBLK; if (bytes_read == 24) { /* we have enough to decode dev num */ @@ -261,7 +261,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size, inode->i_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxCHR", pbuf, 8) == 0) { - cFYI(1,("Char device")); + cFYI(1, ("Char device")); inode->i_mode |= S_IFCHR; if (bytes_read == 24) { /* we have enough to decode dev num */ @@ -270,27 +270,26 @@ static int decode_sfu_inode(struct inode * inode, __u64 size, mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); inode->i_rdev = MKDEV(mjr, mnr); - } + } } else if (memcmp("IntxLNK", pbuf, 7) == 0) { - cFYI(1,("Symlink")); + cFYI(1, ("Symlink")); inode->i_mode |= S_IFLNK; } else { inode->i_mode |= S_IFREG; /* file? */ - rc = -EOPNOTSUPP; + rc = -EOPNOTSUPP; } } else { inode->i_mode |= S_IFREG; /* then it is a file */ - rc = -EOPNOTSUPP; /* or some unknown SFU type */ - } + rc = -EOPNOTSUPP; /* or some unknown SFU type */ + } CIFSSMBClose(xid, pTcon, netfid); } return rc; - } #define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */ -static int get_sfu_uid_mode(struct inode * inode, +static int get_sfu_uid_mode(struct inode *inode, const unsigned char *path, struct cifs_sb_info *cifs_sb, int xid) { @@ -301,15 +300,15 @@ static int get_sfu_uid_mode(struct inode * inode, rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", ea_value, 4 /* size of buf */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc < 0) return (int)rc; else if (rc > 3) { mode = le32_to_cpu(*((__le32 *)ea_value)); - inode->i_mode &= ~SFBITS_MASK; - cFYI(1,("special bits 0%o org mode 0%o", mode, inode->i_mode)); + inode->i_mode &= ~SFBITS_MASK; + cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode)); inode->i_mode = (mode & SFBITS_MASK) | inode->i_mode; - cFYI(1,("special mode bits 0%o", mode)); + cFYI(1, ("special mode bits 0%o", mode)); return 0; } else { return 0; @@ -317,8 +316,6 @@ static int get_sfu_uid_mode(struct inode * inode, #else return -EOPNOTSUPP; #endif - - } int cifs_get_inode_info(struct inode **pinode, @@ -334,11 +331,11 @@ int cifs_get_inode_info(struct inode **pinode, int adjustTZ = FALSE; pTcon = cifs_sb->tcon; - cFYI(1,("Getting info on %s", search_path)); + cFYI(1, ("Getting info on %s", search_path)); if ((pfindData == NULL) && (*pinode != NULL)) { if (CIFS_I(*pinode)->clientCanCacheRead) { - cFYI(1,("No need to revalidate cached inode sizes")); + cFYI(1, ("No need to revalidate cached inode sizes")); return rc; } } @@ -359,12 +356,11 @@ int cifs_get_inode_info(struct inode **pinode, failed at least once - set flag in tcon or mount */ if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, pTcon, search_path, - pfindData, cifs_sb->local_nls, + pfindData, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); adjustTZ = TRUE; } - } /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ if (rc) { @@ -384,8 +380,8 @@ int cifs_get_inode_info(struct inode **pinode, strncat(tmp_path, search_path, MAX_PATHCONF); rc = connect_to_dfs_path(xid, pTcon->ses, /* treename + */ tmp_path, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); kfree(tmp_path); /* BB fix up inode etc. */ @@ -419,17 +415,17 @@ int cifs_get_inode_info(struct inode **pinode, there Windows server or network appliances for which IndexNumber field is not guaranteed unique? */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM){ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { int rc1 = 0; __u64 inode_num; - rc1 = CIFSGetSrvInodeNumber(xid, pTcon, - search_path, &inode_num, + rc1 = CIFSGetSrvInodeNumber(xid, pTcon, + search_path, &inode_num, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc1) { - cFYI(1,("GetSrvInodeNum rc %d", rc1)); + cFYI(1, ("GetSrvInodeNum rc %d", rc1)); /* BB EOPNOSUPP disable SERVER_INUM? */ } else /* do we need cast or hash to ino? */ (*pinode)->i_ino = inode_num; @@ -463,7 +459,7 @@ int cifs_get_inode_info(struct inode **pinode, cFYI(0, ("Attributes came in as 0x%x", attr)); if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; - inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; + inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; } /* set default mode. will override for dirs below */ @@ -471,8 +467,9 @@ int cifs_get_inode_info(struct inode **pinode, /* new inode, can safely set these fields */ inode->i_mode = cifs_sb->mnt_file_mode; else /* since we set the inode type below we need to mask off - to avoid strange results if type changes and both get orred in */ - inode->i_mode &= ~S_IFMT; + to avoid strange results if type changes and both + get orred in */ + inode->i_mode &= ~S_IFMT; /* if (attr & ATTR_REPARSE) */ /* We no longer handle these as symlinks because we could not follow them due to the absolute path with drive letter */ @@ -490,13 +487,13 @@ int cifs_get_inode_info(struct inode **pinode, /* BB Finish for SFU style symlinks and devices */ } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { - if (decode_sfu_inode(inode, + if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), search_path, cifs_sb, xid)) { - cFYI(1,("Unrecognized sfu inode type")); + cFYI(1, ("Unrecognized sfu inode type")); } - cFYI(1,("sfu mode 0%o",inode->i_mode)); + cFYI(1, ("sfu mode 0%o", inode->i_mode)); } else { inode->i_mode |= S_IFREG; /* treat the dos attribute of read-only as read-only @@ -512,12 +509,12 @@ int cifs_get_inode_info(struct inode **pinode, /* BB add code here - validate if device or weird share or device type? */ } - + spin_lock(&inode->i_lock); if (is_size_safe_to_change(cifsInfo, le64_to_cpu(pfindData->EndOfFile))) { /* can not safely shrink the file size here if the client is writing to it due to potential races */ - i_size_write(inode,le64_to_cpu(pfindData->EndOfFile)); + i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); /* 512 bytes (2**9) is the fake blocksize that must be used for this calculation */ @@ -528,7 +525,7 @@ int cifs_get_inode_info(struct inode **pinode, inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks); - /* BB fill in uid and gid here? with help from winbind? + /* BB fill in uid and gid here? with help from winbind? or retrieve from NTFS stream extended attribute */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { /* fill in uid, gid, mode from server ACL */ @@ -540,7 +537,7 @@ int cifs_get_inode_info(struct inode **pinode, inode->i_gid = cifs_sb->mnt_gid; /* set so we do not keep refreshing these fields with bad data after user has changed them in memory */ - atomic_set(&cifsInfo->inUse,1); + atomic_set(&cifsInfo->inUse, 1); } if (S_ISREG(inode->i_mode)) { @@ -557,7 +554,7 @@ int cifs_get_inode_info(struct inode **pinode, else /* not direct, send byte range locks */ inode->i_fop = &cifs_file_ops; - if (pTcon->ses->server->maxBuf < + if (pTcon->ses->server->maxBuf < PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else @@ -586,10 +583,11 @@ void cifs_read_inode(struct inode *inode) cifs_sb = CIFS_SB(inode->i_sb); xid = GetXid(); - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) - cifs_get_inode_info_unix(&inode, "", inode->i_sb,xid); + + if (cifs_sb->tcon->unix_ext) + cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid); else - cifs_get_inode_info(&inode, "", NULL, inode->i_sb,xid); + cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid); /* can not call macro FreeXid here since in a void func */ _FreeXid(xid); } @@ -623,9 +621,21 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) FreeXid(xid); return -ENOMEM; } - rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls, + + if ((pTcon->ses->capabilities & CAP_UNIX) && + (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(pTcon->fsUnixInfo.Capability))) { + rc = CIFSPOSIXDelFile(xid, pTcon, full_path, + SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cFYI(1, ("posix del rc %d", rc)); + if ((rc == 0) || (rc == -ENOENT)) + goto psx_del_no_retry; + } + rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); +psx_del_no_retry: if (!rc) { if (direntry->d_inode) drop_nlink(direntry->d_inode); @@ -638,12 +648,12 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); CIFSSMBClose(xid, pTcon, netfid); if (direntry->d_inode) @@ -659,7 +669,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) rc = CIFSSMBSetTimes(xid, pTcon, full_path, pinfo_buf, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else rc = -EOPNOTSUPP; @@ -670,7 +680,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, full_path, (__u16)ATTR_NORMAL, - cifs_sb->local_nls); + cifs_sb->local_nls); For some strange reason it seems that NT4 eats the old setattr call without actually setting the attributes so on to the third attempted workaround @@ -683,9 +693,9 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) FILE_WRITE_ATTRIBUTES, 0, &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { rc = CIFSSMBSetFileTimes(xid, pTcon, pinfo_buf, netfid); @@ -694,10 +704,10 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) } kfree(pinfo_buf); } - if (rc==0) { - rc = CIFSSMBDelFile(xid, pTcon, full_path, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + if (rc == 0) { + rc = CIFSSMBDelFile(xid, pTcon, full_path, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { if (direntry->d_inode) @@ -711,10 +721,10 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, &netfid, &oplock, NULL, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL, cifs_sb->local_nls, @@ -773,8 +783,8 @@ static void posix_fill_in_inode(struct inode *tmp_inode, tmp_inode->i_mode = le64_to_cpu(pData->Permissions); /* since we set the inode type below we need to mask off type - to avoid strange results if bits above were corrupt */ - tmp_inode->i_mode &= ~S_IFMT; + to avoid strange results if bits above were corrupt */ + tmp_inode->i_mode &= ~S_IFMT; if (type == UNIX_FILE) { *pobject_type = DT_REG; tmp_inode->i_mode |= S_IFREG; @@ -804,11 +814,11 @@ static void posix_fill_in_inode(struct inode *tmp_inode, /* safest to just call it a file */ *pobject_type = DT_REG; tmp_inode->i_mode |= S_IFREG; - cFYI(1,("unknown inode type %d",type)); + cFYI(1, ("unknown inode type %d", type)); } #ifdef CONFIG_CIFS_DEBUG2 - cFYI(1,("object type: %d", type)); + cFYI(1, ("object type: %d", type)); #endif tmp_inode->i_uid = le64_to_cpu(pData->Uid); tmp_inode->i_gid = le64_to_cpu(pData->Gid); @@ -816,7 +826,7 @@ static void posix_fill_in_inode(struct inode *tmp_inode, spin_lock(&tmp_inode->i_lock); if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* can not safely change the file size here if the + /* can not safely change the file size here if the client is writing to it due to potential races */ i_size_write(tmp_inode, end_of_file); @@ -830,27 +840,28 @@ static void posix_fill_in_inode(struct inode *tmp_inode, cFYI(1, ("File inode")); tmp_inode->i_op = &cifs_file_inode_ops; - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; else tmp_inode->i_fop = &cifs_file_direct_ops; - - } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + + } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) tmp_inode->i_fop = &cifs_file_nobrl_ops; else tmp_inode->i_fop = &cifs_file_ops; - if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && - (cifs_sb->tcon->ses->server->maxBuf < + if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) && + (cifs_sb->tcon->ses->server->maxBuf < PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else tmp_inode->i_data.a_ops = &cifs_addr_ops; - if(isNewInode) - return; /* No sense invalidating pages for new inode since we - have not started caching readahead file data yet */ + if (isNewInode) + return; /* No sense invalidating pages for new inode + since we we have not started caching + readahead file data yet */ if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && (local_size == tmp_inode->i_size)) { @@ -869,10 +880,10 @@ static void posix_fill_in_inode(struct inode *tmp_inode, tmp_inode->i_op = &cifs_symlink_inode_ops; /* tmp_inode->i_fop = *//* do not need to set to anything */ } else { - cFYI(1, ("Special inode")); + cFYI(1, ("Special inode")); init_special_inode(tmp_inode, tmp_inode->i_mode, tmp_inode->i_rdev); - } + } } int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) @@ -896,22 +907,22 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) FreeXid(xid); return -ENOMEM; } - - if((pTcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_POSIX_PATH_OPS_CAP & + + if ((pTcon->ses->capabilities & CAP_UNIX) && + (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(pTcon->fsUnixInfo.Capability))) { u32 oplock = 0; - FILE_UNIX_BASIC_INFO * pInfo = + FILE_UNIX_BASIC_INFO * pInfo = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); - if(pInfo == NULL) { + if (pInfo == NULL) { rc = -ENOMEM; goto mkdir_out; } - + rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, NULL /* netfid */, pInfo, &oplock, - full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + full_path, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { cFYI(1, ("posix mkdir returned 0x%x", rc)); @@ -919,8 +930,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) } else { int obj_type; if (pInfo->Type == -1) /* no return info - go query */ - goto mkdir_get_info; -/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need to set uid/gid */ + goto mkdir_get_info; +/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need + to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) direntry->d_op = &cifs_ci_dentry_ops; @@ -937,7 +949,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) newinode->i_ino = (unsigned long)pInfo->UniqueId; } /* note ino incremented to unique num in new_inode */ - if(inode->i_sb->s_flags & MS_NOATIME) + if (inode->i_sb->s_flags & MS_NOATIME) newinode->i_flags |= S_NOATIME | S_NOCMTIME; newinode->i_nlink = 2; @@ -949,18 +961,18 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) posix_fill_in_inode(direntry->d_inode, pInfo, &obj_type, 1 /* NewInode */); #ifdef CONFIG_CIFS_DEBUG2 - cFYI(1,("instantiated dentry %p %s to inode %p", + cFYI(1, ("instantiated dentry %p %s to inode %p", direntry, direntry->d_name.name, newinode)); - if(newinode->i_nlink != 2) - cFYI(1,("unexpected number of links %d", + if (newinode->i_nlink != 2) + cFYI(1, ("unexpected number of links %d", newinode->i_nlink)); #endif } kfree(pInfo); goto mkdir_out; - } - + } + /* BB add setting the equivalent of mode via CreateX w/ACLs */ rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -968,14 +980,14 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) cFYI(1, ("cifs_mkdir returned 0x%x", rc)); d_drop(direntry); } else { -mkdir_get_info: +mkdir_get_info: inc_nlink(inode); - if (pTcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb,xid); + inode->i_sb, xid); else rc = cifs_get_inode_info(&newinode, full_path, NULL, - inode->i_sb,xid); + inode->i_sb, xid); if (pTcon->nocase) direntry->d_op = &cifs_ci_dentry_ops; @@ -983,10 +995,10 @@ mkdir_get_info: direntry->d_op = &cifs_dentry_ops; d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we - * failed to get it from the server or was set bogus */ + * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) - direntry->d_inode->i_nlink = 2; - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { + direntry->d_inode->i_nlink = 2; + if (pTcon->unix_ext) { mode &= ~current->fs->umask; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, @@ -1002,27 +1014,27 @@ mkdir_get_info: mode, (__u64)-1, (__u64)-1, 0 /* dev_t */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } } else { /* BB to be implemented via Windows secrty descriptors eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, -1, -1, local_nls); */ - if(direntry->d_inode) { + if (direntry->d_inode) { direntry->d_inode->i_mode = mode; direntry->d_inode->i_mode |= S_IFDIR; - if(cifs_sb->mnt_cifs_flags & + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - direntry->d_inode->i_uid = + direntry->d_inode->i_uid = current->fsuid; - direntry->d_inode->i_gid = + direntry->d_inode->i_gid = current->fsgid; } } } } -mkdir_out: +mkdir_out: kfree(full_path); FreeXid(xid); return rc; @@ -1056,7 +1068,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) if (!rc) { drop_nlink(inode); spin_lock(&direntry->d_inode->i_lock); - i_size_write(direntry->d_inode,0); + i_size_write(direntry->d_inode, 0); clear_nlink(direntry->d_inode); spin_unlock(&direntry->d_inode->i_lock); } @@ -1119,9 +1131,9 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); if (info_buf_source != NULL) { info_buf_target = info_buf_source + 1; - if (pTcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, - info_buf_source, + info_buf_source, cifs_sb_source->local_nls, cifs_sb_source->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -1171,12 +1183,12 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, might not right be right access to request */ rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, CREATE_NOT_DIR, &netfid, &oplock, NULL, - cifs_sb_source->local_nls, - cifs_sb_source->mnt_cifs_flags & + cifs_sb_source->local_nls, + cifs_sb_source->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, - cifs_sb_source->local_nls, + cifs_sb_source->local_nls, cifs_sb_source->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); CIFSSMBClose(xid, pTcon, netfid); @@ -1247,9 +1259,9 @@ int cifs_revalidate(struct dentry *direntry) local_mtime = direntry->d_inode->i_mtime; local_size = direntry->d_inode->i_size; - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { + if (cifs_sb->tcon->unix_ext) { rc = cifs_get_inode_info_unix(&direntry->d_inode, full_path, - direntry->d_sb,xid); + direntry->d_sb, xid); if (rc) { cFYI(1, ("error on getting revalidate info %d", rc)); /* if (rc != -ENOENT) @@ -1258,7 +1270,7 @@ int cifs_revalidate(struct dentry *direntry) } } else { rc = cifs_get_inode_info(&direntry->d_inode, full_path, NULL, - direntry->d_sb,xid); + direntry->d_sb, xid); if (rc) { cFYI(1, ("error on getting revalidate info %d", rc)); /* if (rc != -ENOENT) @@ -1271,7 +1283,7 @@ int cifs_revalidate(struct dentry *direntry) /* if not oplocked, we invalidate inode pages if mtime or file size had changed on server */ - if (timespec_equal(&local_mtime,&direntry->d_inode->i_mtime) && + if (timespec_equal(&local_mtime, &direntry->d_inode->i_mtime) && (local_size == direntry->d_inode->i_size)) { cFYI(1, ("cifs_revalidate - inode unchanged")); } else { @@ -1298,7 +1310,7 @@ int cifs_revalidate(struct dentry *direntry) if (invalidate_inode) { /* shrink_dcache not necessary now that cifs dentry ops are exported for negative dentries */ -/* if(S_ISDIR(direntry->d_inode->i_mode)) +/* if (S_ISDIR(direntry->d_inode->i_mode)) shrink_dcache_parent(direntry); */ if (S_ISREG(direntry->d_inode->i_mode)) { if (direntry->d_inode->i_mapping) @@ -1313,7 +1325,7 @@ int cifs_revalidate(struct dentry *direntry) } } /* mutex_unlock(&direntry->d_inode->i_mutex); */ - + kfree(full_path); FreeXid(xid); return rc; @@ -1335,23 +1347,19 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - char *kaddr; int rc = 0; page = grab_cache_page(mapping, index); if (!page) return -ENOMEM; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); unlock_page(page); page_cache_release(page); return rc; } -static int cifs_vmtruncate(struct inode * inode, loff_t offset) +static int cifs_vmtruncate(struct inode *inode, loff_t offset) { struct address_space *mapping = inode->i_mapping; unsigned long limit; @@ -1424,13 +1432,13 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { /* check if we have permission to change attrs */ rc = inode_change_ok(direntry->d_inode, attrs); - if(rc < 0) { + if (rc < 0) { FreeXid(xid); return rc; } else rc = 0; } - + full_path = build_path_from_dentry(direntry); if (full_path == NULL) { FreeXid(xid); @@ -1459,16 +1467,16 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, npid, FALSE); atomic_dec(&open_file->wrtPending); - cFYI(1,("SetFSize for attrs rc = %d", rc)); - if((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + cFYI(1, ("SetFSize for attrs rc = %d", rc)); + if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { int bytes_written; rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size, &bytes_written, NULL, NULL, 1 /* 45 seconds */); - cFYI(1,("Wrt seteof rc %d", rc)); + cFYI(1, ("Wrt seteof rc %d", rc)); } - } else + } else rc = -EINVAL; if (rc != 0) { @@ -1478,11 +1486,11 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) it by handle */ rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, FALSE, - cifs_sb->local_nls, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); - if((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { __u16 netfid; int oplock = FALSE; @@ -1493,14 +1501,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { int bytes_written; rc = CIFSSMBWrite(xid, pTcon, netfid, 0, attrs->ia_size, &bytes_written, NULL, NULL, 1 /* 45 sec */); - cFYI(1,("wrt seteof rc %d",rc)); + cFYI(1, ("wrt seteof rc %d", rc)); CIFSSMBClose(xid, pTcon, netfid); } @@ -1517,7 +1525,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) rc = cifs_vmtruncate(direntry->d_inode, attrs->ia_size); cifs_truncate_page(direntry->d_inode->i_mapping, direntry->d_inode->i_size); - } else + } else goto cifs_setattr_exit; } if (attrs->ia_valid & ATTR_UID) { @@ -1535,11 +1543,11 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) mode = attrs->ia_mode; } - if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) + if ((pTcon->unix_ext) && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID))) rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid, 0 /* dev_t */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else if (attrs->ia_valid & ATTR_MODE) { rc = 0; @@ -1559,7 +1567,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & (~ATTR_READONLY)); /* Windows ignores set to zero */ - if(time_buf.Attributes == 0) + if (time_buf.Attributes == 0) time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); } /* BB to be implemented - @@ -1585,7 +1593,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) stamps are changed explicitly (i.e. by utime() since we would then have a mix of client and server times */ - + if (set_time && (attrs->ia_valid & ATTR_CTIME)) { set_time = TRUE; /* Although Samba throws this field away @@ -1624,7 +1632,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc==0) { + if (rc == 0) { rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf, netfid); CIFSSMBClose(xid, pTcon, netfid); @@ -1634,7 +1642,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) granularity */ /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path, - &time_buf, cifs_sb->local_nls); */ + &time_buf, cifs_sb->local_nls); */ } } /* Even if error on time set, no sense failing the call if @@ -1642,7 +1650,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) and this check ensures that we are not being called from sys_utimes in which case we ought to fail the call back to the user when the server rejects the call */ - if((rc) && (attrs->ia_valid & + if ((rc) && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) rc = 0; } diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index a414f1775ae0..d24fe6880a04 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -3,7 +3,7 @@ * * vfs operations that deal with io control * - * Copyright (C) International Business Machines Corp., 2005 + * Copyright (C) International Business Machines Corp., 2005,2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -30,7 +30,7 @@ #define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2) -int cifs_ioctl (struct inode * inode, struct file * filep, +int cifs_ioctl (struct inode *inode, struct file *filep, unsigned int command, unsigned long arg) { int rc = -ENOTTY; /* strange error - but the precedent */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 6baea85d726e..6a85ef7b8797 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -50,32 +50,33 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, fromName = build_path_from_dentry(old_file); toName = build_path_from_dentry(direntry); - if((fromName == NULL) || (toName == NULL)) { + if ((fromName == NULL) || (toName == NULL)) { rc = -ENOMEM; goto cifs_hl_exit; } - if (cifs_sb_target->tcon->ses->capabilities & CAP_UNIX) +/* if (cifs_sb_target->tcon->ses->capabilities & CAP_UNIX)*/ + if (pTcon->unix_ext) rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, - cifs_sb_target->local_nls, + cifs_sb_target->local_nls, cifs_sb_target->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else { rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, - cifs_sb_target->local_nls, + cifs_sb_target->local_nls, cifs_sb_target->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if((rc == -EIO) || (rc == -EINVAL)) - rc = -EOPNOTSUPP; + if ((rc == -EIO) || (rc == -EINVAL)) + rc = -EOPNOTSUPP; } d_drop(direntry); /* force new lookup from server of target */ /* if source file is cached (oplocked) revalidate will not go to server until the file is closed or oplock broken so update nlinks locally */ - if(old_file->d_inode) { + if (old_file->d_inode) { cifsInode = CIFS_I(old_file->d_inode); - if(rc == 0) { + if (rc == 0) { old_file->d_inode->i_nlink++; /* BB should we make this contingent on superblock flag NOATIME? */ /* old_file->d_inode->i_ctime = CURRENT_TIME;*/ @@ -84,14 +85,14 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, to set the parent dir cifs inode time to zero to force revalidate (faster) for it too? */ } - /* if not oplocked will force revalidate to get info + /* if not oplocked will force revalidate to get info on source file from srv */ cifsInode->time = 0; - /* Will update parent dir timestamps from srv within a second. + /* Will update parent dir timestamps from srv within a second. Would it really be worth it to set the parent dir (cifs inode) time field to zero to force revalidate on parent - directory faster ie + directory faster ie CIFS_I(inode)->time = 0; */ } @@ -109,7 +110,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) int rc = -EACCES; int xid; char *full_path = NULL; - char * target_path = ERR_PTR(-ENOMEM); + char *target_path = ERR_PTR(-ENOMEM); struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; @@ -129,13 +130,19 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) goto out; } -/* BB add read reparse point symlink code and Unix extensions symlink code here BB */ + /* We could change this to: + if (pTcon->unix_ext) + but there does not seem any point in refusing to + get symlink info if we can, even if unix extensions + turned off for this mount */ + if (pTcon->ses->capabilities & CAP_UNIX) rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, target_path, PATH_MAX-1, cifs_sb->local_nls); else { + /* BB add read reparse point symlink code here */ /* rc = CIFSSMBQueryReparseLinkInfo */ /* BB Add code to Query ReparsePoint info */ /* BB Add MAC style xsymlink check here if enabled */ @@ -176,7 +183,7 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } @@ -185,19 +192,20 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) cFYI(1, ("symname is %s", symname)); /* BB what if DFS and this volume is on different share? BB */ - if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, cifs_sb->local_nls); /* else - rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName,cifs_sb_target->local_nls); */ + rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName, + cifs_sb_target->local_nls); */ if (rc == 0) { - if (pTcon->ses->capabilities & CAP_UNIX) + if (pTcon->unix_ext) rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb,xid); + inode->i_sb, xid); else rc = cifs_get_inode_info(&newinode, full_path, NULL, - inode->i_sb,xid); + inode->i_sb, xid); if (rc != 0) { cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d", @@ -226,9 +234,9 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; char *full_path = NULL; - char *tmp_path = NULL; - char * tmpbuffer; - unsigned char * referrals = NULL; + char *tmp_path = NULL; + char *tmpbuffer; + unsigned char *referrals = NULL; int num_referrals = 0; int len; __u16 fid; @@ -237,13 +245,13 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; -/* BB would it be safe against deadlock to grab this sem +/* BB would it be safe against deadlock to grab this sem even though rename itself grabs the sem and calls lookup? */ /* mutex_lock(&inode->i_sb->s_vfs_rename_mutex);*/ full_path = build_path_from_dentry(direntry); /* mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);*/ - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } @@ -251,70 +259,80 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) cFYI(1, ("Full path: %s inode = 0x%p pBuffer = 0x%p buflen = %d", full_path, inode, pBuffer, buflen)); - if(buflen > PATH_MAX) + if (buflen > PATH_MAX) len = PATH_MAX; else len = buflen; - tmpbuffer = kmalloc(len,GFP_KERNEL); - if(tmpbuffer == NULL) { + tmpbuffer = kmalloc(len, GFP_KERNEL); + if (tmpbuffer == NULL) { kfree(full_path); FreeXid(xid); return -ENOMEM; } -/* BB add read reparse point symlink code and Unix extensions symlink code here BB */ +/* BB add read reparse point symlink code and + Unix extensions symlink code here BB */ +/* We could disable this based on pTcon->unix_ext flag instead ... but why? */ if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, tmpbuffer, len - 1, cifs_sb->local_nls); else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { - cERROR(1,("SFU style symlinks not implemented yet")); + cERROR(1, ("SFU style symlinks not implemented yet")); /* add open and read as in fs/cifs/inode.c */ - } else { rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ, - OPEN_REPARSE_POINT,&fid, &oplock, NULL, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + OPEN_REPARSE_POINT, &fid, &oplock, NULL, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if(!rc) { + if (!rc) { rc = CIFSSMBQueryReparseLinkInfo(xid, pTcon, full_path, tmpbuffer, - len - 1, + len - 1, fid, cifs_sb->local_nls); - if(CIFSSMBClose(xid, pTcon, fid)) { - cFYI(1,("Error closing junction point (open for ioctl)")); + if (CIFSSMBClose(xid, pTcon, fid)) { + cFYI(1, ("Error closing junction point " + "(open for ioctl)")); } - if(rc == -EIO) { + if (rc == -EIO) { /* Query if DFS Junction */ tmp_path = kmalloc(MAX_TREE_SIZE + MAX_PATHCONF + 1, GFP_KERNEL); if (tmp_path) { - strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE); - strncat(tmp_path, full_path, MAX_PATHCONF); - rc = get_dfs_path(xid, pTcon->ses, tmp_path, + strncpy(tmp_path, pTcon->treeName, + MAX_TREE_SIZE); + strncat(tmp_path, full_path, + MAX_PATHCONF); + rc = get_dfs_path(xid, pTcon->ses, + tmp_path, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1,("Get DFS for %s rc = %d ",tmp_path, rc)); - if((num_referrals == 0) && (rc == 0)) + cFYI(1, ("Get DFS for %s rc = %d ", + tmp_path, rc)); + if ((num_referrals == 0) && (rc == 0)) rc = -EACCES; else { - cFYI(1,("num referral: %d",num_referrals)); - if(referrals) { - cFYI(1,("referral string: %s",referrals)); - strncpy(tmpbuffer, referrals, len-1); + cFYI(1, ("num referral: %d", + num_referrals)); + if (referrals) { + cFYI(1,("referral string: %s", referrals)); + strncpy(tmpbuffer, + referrals, + len-1); } } kfree(referrals); kfree(tmp_path); } - /* BB add code like else decode referrals then memcpy to - tmpbuffer and free referrals string array BB */ + /* BB add code like else decode referrals + then memcpy to tmpbuffer and free referrals + string array BB */ } } } diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c index 46d62c9dda0f..a2415c1a14db 100644 --- a/fs/cifs/md4.c +++ b/fs/cifs/md4.c @@ -1,20 +1,20 @@ -/* +/* Unix SMB/Netbios implementation. Version 1.9. a implementation of MD4 designed for use in the SMB authentication protocol Copyright (C) Andrew Tridgell 1997-1998. Modified by Steve French (sfrench@us.ibm.com) 2002-2003 - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. @@ -170,7 +170,7 @@ mdfour(unsigned char *out, unsigned char *in, int n) while (n > 64) { copy64(M, in); - mdfour64(M,&A,&B, &C, &D); + mdfour64(M, &A, &B, &C, &D); in += 64; n -= 64; } diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c index ccebf9b7eb86..e5c3e1212697 100644 --- a/fs/cifs/md5.c +++ b/fs/cifs/md5.c @@ -15,9 +15,9 @@ * will fill a supplied 16-byte array with the digest. */ -/* This code slightly modified to fit into Samba by - abartlet@samba.org Jun 2001 - and to fit the cifs vfs by +/* This code slightly modified to fit into Samba by + abartlet@samba.org Jun 2001 + and to fit the cifs vfs by Steve French sfrench@us.ibm.com */ #include <linux/string.h> @@ -106,7 +106,7 @@ MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) } /* - * Final wrapup - pad to 64-byte boundary with the bit pattern + * Final wrapup - pad to 64-byte boundary with the bit pattern * 1 0* (64-bit count of bits processed, MSB-first) */ void diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 19cc294c7c70..0bcec0844bee 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1,7 +1,7 @@ /* * fs/cifs/misc.c * - * Copyright (C) International Business Machines Corp., 2002,2005 + * Copyright (C) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/slab.h> @@ -32,12 +32,12 @@ extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; -extern struct task_struct * oplockThread; +extern struct task_struct *oplockThread; -/* The xid serves as a useful identifier for each incoming vfs request, - in a similar way to the mid which is useful to track each sent smb, - and CurrentXid can also provide a running counter (although it - will eventually wrap past zero) of the total vfs operations handled +/* The xid serves as a useful identifier for each incoming vfs request, + in a similar way to the mid which is useful to track each sent smb, + and CurrentXid can also provide a running counter (although it + will eventually wrap past zero) of the total vfs operations handled since the cifs fs was mounted */ unsigned int @@ -47,10 +47,12 @@ _GetXid(void) spin_lock(&GlobalMid_Lock); GlobalTotalActiveXid++; + + /* keep high water mark for number of simultaneous ops in filesystem */ if (GlobalTotalActiveXid > GlobalMaxActiveXid) - GlobalMaxActiveXid = GlobalTotalActiveXid; /* keep high water mark for number of simultaneous vfs ops in our filesystem */ - if(GlobalTotalActiveXid > 65000) - cFYI(1,("warning: more than 65000 requests active")); + GlobalMaxActiveXid = GlobalTotalActiveXid; + if (GlobalTotalActiveXid > 65000) + cFYI(1, ("warning: more than 65000 requests active")); xid = GlobalCurrentXid++; spin_unlock(&GlobalMid_Lock); return xid; @@ -60,7 +62,7 @@ void _FreeXid(unsigned int xid) { spin_lock(&GlobalMid_Lock); - /* if(GlobalTotalActiveXid == 0) + /* if (GlobalTotalActiveXid == 0) BUG(); */ GlobalTotalActiveXid--; spin_unlock(&GlobalMid_Lock); @@ -144,12 +146,12 @@ cifs_buf_get(void) { struct smb_hdr *ret_buf = NULL; -/* We could use negotiated size instead of max_msgsize - - but it may be more efficient to always alloc same size - albeit slightly larger than necessary and maxbuffersize +/* We could use negotiated size instead of max_msgsize - + but it may be more efficient to always alloc same size + albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ - ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_req_poolp, GFP_KERNEL | GFP_NOFS); + ret_buf = (struct smb_hdr *) mempool_alloc(cifs_req_poolp, + GFP_KERNEL | GFP_NOFS); /* clear the first few header bytes */ /* for most paths, more is cleared in header_assemble */ @@ -172,7 +174,7 @@ cifs_buf_release(void *buf_to_free) /* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/ return; } - mempool_free(buf_to_free,cifs_req_poolp); + mempool_free(buf_to_free, cifs_req_poolp); atomic_dec(&bufAllocCount); return; @@ -183,12 +185,12 @@ cifs_small_buf_get(void) { struct smb_hdr *ret_buf = NULL; -/* We could use negotiated size instead of max_msgsize - - but it may be more efficient to always alloc same size - albeit slightly larger than necessary and maxbuffersize +/* We could use negotiated size instead of max_msgsize - + but it may be more efficient to always alloc same size + albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ - ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, GFP_KERNEL | GFP_NOFS); + ret_buf = (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, + GFP_KERNEL | GFP_NOFS); if (ret_buf) { /* No need to clear memory here, cleared in header assemble */ /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ @@ -209,30 +211,30 @@ cifs_small_buf_release(void *buf_to_free) cFYI(1, ("Null buffer passed to cifs_small_buf_release")); return; } - mempool_free(buf_to_free,cifs_sm_req_poolp); + mempool_free(buf_to_free, cifs_sm_req_poolp); atomic_dec(&smBufAllocCount); return; } -/* +/* Find a free multiplex id (SMB mid). Otherwise there could be mid collisions which might cause problems, demultiplexing the wrong response to this request. Multiplex ids could collide if one of a series requests takes much longer than the others, or if a very large number of long lived requests (byte range locks or FindNotify requests) are pending. No more than - 64K-1 requests can be outstanding at one time. If no + 64K-1 requests can be outstanding at one time. If no mids are available, return zero. A future optimization could make the combination of mids and uid the key we use - to demultiplex on (rather than mid alone). + to demultiplex on (rather than mid alone). In addition to the above check, the cifs demultiplex code already used the command code as a secondary check of the frame and if signing is negotiated the response would be discarded if the mid were the same but the signature was wrong. Since the mid is not put in the pending queue until later (when it is about to be dispatched) - we do have to limit the number of outstanding requests + we do have to limit the number of outstanding requests to somewhat less than 64K-1 although it is hard to imagine so many threads being in the vfs at one time. */ @@ -240,27 +242,27 @@ __u16 GetNextMid(struct TCP_Server_Info *server) { __u16 mid = 0; __u16 last_mid; - int collision; + int collision; - if(server == NULL) + if (server == NULL) return mid; spin_lock(&GlobalMid_Lock); last_mid = server->CurrentMid; /* we do not want to loop forever */ server->CurrentMid++; /* This nested loop looks more expensive than it is. - In practice the list of pending requests is short, + In practice the list of pending requests is short, fewer than 50, and the mids are likely to be unique on the first pass through the loop unless some request takes longer than the 64 thousand requests before it (and it would also have to have been a request that did not time out) */ - while(server->CurrentMid != last_mid) { + while (server->CurrentMid != last_mid) { struct list_head *tmp; struct mid_q_entry *mid_entry; collision = 0; - if(server->CurrentMid == 0) + if (server->CurrentMid == 0) server->CurrentMid++; list_for_each(tmp, &server->pending_mid_q) { @@ -273,7 +275,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server) break; } } - if(collision == 0) { + if (collision == 0) { mid = server->CurrentMid; break; } @@ -290,11 +292,11 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , const struct cifsTconInfo *treeCon, int word_count /* length of fixed section (word count) in two byte units */) { - struct list_head* temp_item; - struct cifsSesInfo * ses; + struct list_head *temp_item; + struct cifsSesInfo *ses; char *temp = (char *) buffer; - memset(temp,0,256); /* bigger than MAX_CIFS_HDR_SIZE */ + memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ buffer->smb_buf_length = (2 * word_count) + sizeof (struct smb_hdr) - @@ -325,7 +327,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* Uid is not converted */ buffer->Uid = treeCon->ses->Suid; buffer->Mid = GetNextMid(treeCon->ses->server); - if(multiuser_mount != 0) { + if (multiuser_mount != 0) { /* For the multiuser case, there are few obvious technically */ /* possible mechanisms to match the local linux user (uid) */ /* to a valid remote smb user (smb_uid): */ @@ -348,21 +350,22 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* flag were disabled. */ /* BB Add support for establishing new tCon and SMB Session */ - /* with userid/password pairs found on the smb session */ + /* with userid/password pairs found on the smb session */ /* for other target tcp/ip addresses BB */ - if(current->fsuid != treeCon->ses->linux_uid) { - cFYI(1,("Multiuser mode and UID did not match tcon uid")); + if (current->fsuid != treeCon->ses->linux_uid) { + cFYI(1, ("Multiuser mode and UID " + "did not match tcon uid")); read_lock(&GlobalSMBSeslock); list_for_each(temp_item, &GlobalSMBSessionList) { ses = list_entry(temp_item, struct cifsSesInfo, cifsSessionList); - if(ses->linux_uid == current->fsuid) { - if(ses->server == treeCon->ses->server) { - cFYI(1,("found matching uid substitute right smb_uid")); + if (ses->linux_uid == current->fsuid) { + if (ses->server == treeCon->ses->server) { + cFYI(1, ("found matching uid substitute right smb_uid")); buffer->Uid = ses->Suid; break; } else { - /* BB eventually call cifs_setup_session here */ - cFYI(1,("local UID found but smb sess with this server does not exist")); + /* BB eventually call cifs_setup_session here */ + cFYI(1, ("local UID found but no smb sess with this server exists")); } } } @@ -374,8 +377,8 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , buffer->Flags2 |= SMBFLG2_DFS; if (treeCon->nocase) buffer->Flags |= SMBFLG_CASELESS; - if((treeCon->ses) && (treeCon->ses->server)) - if(treeCon->ses->server->secMode & + if ((treeCon->ses) && (treeCon->ses->server)) + if (treeCon->ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; } @@ -388,18 +391,18 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , static int checkSMBhdr(struct smb_hdr *smb, __u16 mid) { - /* Make sure that this really is an SMB, that it is a response, + /* Make sure that this really is an SMB, that it is a response, and that the message ids match */ - if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && - (mid == smb->Mid)) { - if(smb->Flags & SMBFLG_RESPONSE) - return 0; - else { + if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && + (mid == smb->Mid)) { + if (smb->Flags & SMBFLG_RESPONSE) + return 0; + else { /* only one valid case where server sends us request */ - if(smb->Command == SMB_COM_LOCKING_ANDX) + if (smb->Command == SMB_COM_LOCKING_ANDX) return 0; else - cERROR(1, ("Rcvd Request not response")); + cERROR(1, ("Received Request not response")); } } else { /* bad signature or mid */ if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) @@ -426,9 +429,9 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) smb->WordCount = 0; /* some error cases do not return wct and bcc */ return 0; - } else if ((length == sizeof(struct smb_hdr) + 1) && + } else if ((length == sizeof(struct smb_hdr) + 1) && (smb->WordCount == 0)) { - char * tmp = (char *)smb; + char *tmp = (char *)smb; /* Need to work around a bug in two servers here */ /* First, check if the part of bcc they sent was zero */ if (tmp[sizeof(struct smb_hdr)] == 0) { @@ -442,7 +445,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) tmp[sizeof(struct smb_hdr)+1] = 0; return 0; } - cERROR(1,("rcvd invalid byte count (bcc)")); + cERROR(1, ("rcvd invalid byte count (bcc)")); } else { cERROR(1, ("Length less than smb header size")); } @@ -458,32 +461,33 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) return 1; clc_len = smbCalcSize_LE(smb); - if(4 + len != length) { - cERROR(1, ("Length read does not match RFC1001 length %d",len)); + if (4 + len != length) { + cERROR(1, ("Length read does not match RFC1001 length %d", + len)); return 1; } if (4 + len != clc_len) { /* check if bcc wrapped around for large read responses */ - if((len > 64 * 1024) && (len > clc_len)) { + if ((len > 64 * 1024) && (len > clc_len)) { /* check if lengths match mod 64K */ - if(((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) - return 0; /* bcc wrapped */ + if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) + return 0; /* bcc wrapped */ } cFYI(1, ("Calculated size %d vs length %d mismatch for mid %d", clc_len, 4 + len, smb->Mid)); /* Windows XP can return a few bytes too much, presumably - an illegal pad, at the end of byte range lock responses + an illegal pad, at the end of byte range lock responses so we allow for that three byte pad, as long as actual received length is as long or longer than calculated length */ - /* We have now had to extend this more, since there is a + /* We have now had to extend this more, since there is a case in which it needs to be bigger still to handle a malformed response to transact2 findfirst from WinXP when access denied is returned and thus bcc and wct are zero but server says length is 0x21 bytes too long as if the server forget to reset the smb rfc1001 length when it reset the wct and bcc to minimum size and drop the t2 parms and data */ - if((4+len > clc_len) && (len <= clc_len + 512)) + if ((4+len > clc_len) && (len <= clc_len + 512)) return 0; else { cERROR(1, ("RFC1001 size %d bigger than SMB for Mid=%d", @@ -495,61 +499,64 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) } int is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) -{ - struct smb_com_lock_req * pSMB = (struct smb_com_lock_req *)buf; +{ + struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; struct list_head *tmp; struct list_head *tmp1; struct cifsTconInfo *tcon; struct cifsFileInfo *netfile; - cFYI(1,("Checking for oplock break or dnotify response")); - if((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && + cFYI(1, ("Checking for oplock break or dnotify response")); + if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && (pSMB->hdr.Flags & SMBFLG_RESPONSE)) { - struct smb_com_transaction_change_notify_rsp * pSMBr = + struct smb_com_transaction_change_notify_rsp *pSMBr = (struct smb_com_transaction_change_notify_rsp *)buf; - struct file_notify_information * pnotify; + struct file_notify_information *pnotify; __u32 data_offset = 0; - if(pSMBr->ByteCount > sizeof(struct file_notify_information)) { + if (pSMBr->ByteCount > sizeof(struct file_notify_information)) { data_offset = le32_to_cpu(pSMBr->DataOffset); pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); - cFYI(1,("dnotify on %s Action: 0x%x",pnotify->FileName, + cFYI(1, ("dnotify on %s Action: 0x%x", + pnotify->FileName, pnotify->Action)); /* BB removeme BB */ - /* cifs_dump_mem("Rcvd notify Data: ",buf, + /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ return TRUE; } - if(pSMBr->hdr.Status.CifsError) { - cFYI(1,("notify err 0x%d",pSMBr->hdr.Status.CifsError)); + if (pSMBr->hdr.Status.CifsError) { + cFYI(1, ("notify err 0x%d", + pSMBr->hdr.Status.CifsError)); return TRUE; } return FALSE; - } - if(pSMB->hdr.Command != SMB_COM_LOCKING_ANDX) + } + if (pSMB->hdr.Command != SMB_COM_LOCKING_ANDX) return FALSE; - if(pSMB->hdr.Flags & SMBFLG_RESPONSE) { + if (pSMB->hdr.Flags & SMBFLG_RESPONSE) { /* no sense logging error on invalid handle on oplock break - harmless race between close request and oplock break response is expected from time to time writing out large dirty files cached on the client */ - if ((NT_STATUS_INVALID_HANDLE) == - le32_to_cpu(pSMB->hdr.Status.CifsError)) { - cFYI(1,("invalid handle on oplock break")); + if ((NT_STATUS_INVALID_HANDLE) == + le32_to_cpu(pSMB->hdr.Status.CifsError)) { + cFYI(1, ("invalid handle on oplock break")); return TRUE; - } else if (ERRbadfid == + } else if (ERRbadfid == le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { - return TRUE; + return TRUE; } else { return FALSE; /* on valid oplock brk we get "request" */ } } - if(pSMB->hdr.WordCount != 8) + if (pSMB->hdr.WordCount != 8) return FALSE; - cFYI(1,(" oplock type 0x%d level 0x%d",pSMB->LockType,pSMB->OplockLevel)); - if(!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) - return FALSE; + cFYI(1, ("oplock type 0x%d level 0x%d", + pSMB->LockType, pSMB->OplockLevel)); + if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) + return FALSE; /* look up tcon based on tid & uid */ read_lock(&GlobalSMBSeslock); @@ -557,36 +564,38 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); if ((tcon->tid == buf->Tid) && (srv == tcon->ses->server)) { cifs_stats_inc(&tcon->num_oplock_brks); - list_for_each(tmp1,&tcon->openFileList){ - netfile = list_entry(tmp1,struct cifsFileInfo, + list_for_each(tmp1, &tcon->openFileList) { + netfile = list_entry(tmp1, struct cifsFileInfo, tlist); - if(pSMB->Fid == netfile->netfid) { + if (pSMB->Fid == netfile->netfid) { struct cifsInodeInfo *pCifsInode; read_unlock(&GlobalSMBSeslock); - cFYI(1,("file id match, oplock break")); - pCifsInode = + cFYI(1, + ("file id match, oplock break")); + pCifsInode = CIFS_I(netfile->pInode); pCifsInode->clientCanCacheAll = FALSE; - if(pSMB->OplockLevel == 0) + if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead = FALSE; pCifsInode->oplockPending = TRUE; AllocOplockQEntry(netfile->pInode, netfile->netfid, tcon); - cFYI(1,("about to wake up oplock thd")); - if(oplockThread) + cFYI(1, + ("about to wake up oplock thread")); + if (oplockThread) wake_up_process(oplockThread); return TRUE; } } read_unlock(&GlobalSMBSeslock); - cFYI(1,("No matching file for oplock break")); + cFYI(1, ("No matching file for oplock break")); return TRUE; } } read_unlock(&GlobalSMBSeslock); - cFYI(1,("Can not process oplock break for non-existent connection")); + cFYI(1, ("Can not process oplock break for non-existent connection")); return TRUE; } @@ -643,13 +652,13 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length) only legal in POSIX-like OS (if they are present in the string). Path names are little endian 16 bit Unicode on the wire */ int -cifs_convertUCSpath(char *target, const __le16 * source, int maxlen, - const struct nls_table * cp) +cifs_convertUCSpath(char *target, const __le16 *source, int maxlen, + const struct nls_table *cp) { - int i,j,len; + int i, j, len; __u16 src_char; - for(i = 0, j = 0; i < maxlen; i++) { + for (i = 0, j = 0; i < maxlen; i++) { src_char = le16_to_cpu(source[i]); switch (src_char) { case 0: @@ -678,10 +687,10 @@ cifs_convertUCSpath(char *target, const __le16 * source, int maxlen, case UNI_LESSTHAN: target[j] = '<'; break; - default: - len = cp->uni2char(src_char, &target[j], + default: + len = cp->uni2char(src_char, &target[j], NLS_MAX_CHARSET_SIZE); - if(len > 0) { + if (len > 0) { j += len; continue; } else { @@ -690,7 +699,7 @@ cifs_convertUCSpath(char *target, const __le16 * source, int maxlen, } j++; /* make sure we do not overrun callers allocated temp buffer */ - if(j >= (2 * NAME_MAX)) + if (j >= (2 * NAME_MAX)) break; } cUCS_out: @@ -703,18 +712,18 @@ cUCS_out: only legal in POSIX-like OS (if they are present in the string). Path names are little endian 16 bit Unicode on the wire */ int -cifsConvertToUCS(__le16 * target, const char *source, int maxlen, - const struct nls_table * cp, int mapChars) +cifsConvertToUCS(__le16 *target, const char *source, int maxlen, + const struct nls_table *cp, int mapChars) { - int i,j,charlen; + int i, j, charlen; int len_remaining = maxlen; char src_char; __u16 temp; - if(!mapChars) + if (!mapChars) return cifs_strtoUCS(target, source, PATH_MAX, cp); - for(i = 0, j = 0; i < maxlen; j++) { + for (i = 0, j = 0; i < maxlen; j++) { src_char = source[i]; switch (src_char) { case 0: @@ -737,7 +746,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen, break; case '|': target[j] = cpu_to_le16(UNI_PIPE); - break; + break; /* BB We can not handle remapping slash until all the calls to build_path_from_dentry are modified, as they use slash as separator BB */ @@ -749,7 +758,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen, len_remaining, &temp); /* if no match, use question mark, which at least in some cases servers as wild card */ - if(charlen < 1) { + if (charlen < 1) { target[j] = cpu_to_le16(0x003f); charlen = 1; } else @@ -758,7 +767,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen, /* character may take more than one byte in the the source string, but will take exactly two bytes in the target string */ - i+= charlen; + i += charlen; continue; } i++; /* move to next char in source string */ diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 53e304d59544..2bfed3f45d0f 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -3,23 +3,22 @@ * * Copyright (c) International Business Machines Corp., 2002 * Author(s): Steve French (sfrench@us.ibm.com) - * + * * Error mapping routines from Samba libsmb/errormap.c * Copyright (C) Andrew Tridgell 2001 * - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -30,9 +29,7 @@ #include <linux/fs.h> #include <asm/div64.h> #include <asm/byteorder.h> -#ifdef CONFIG_CIFS_EXPERIMENTAL #include <linux/inet.h> -#endif #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" @@ -67,22 +64,22 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRbadshare, -ETXTBSY}, {ERRlock, -EACCES}, {ERRunsup, -EINVAL}, - {ERRnosuchshare,-ENXIO}, + {ERRnosuchshare, -ENXIO}, {ERRfilexists, -EEXIST}, {ERRinvparm, -EINVAL}, {ERRdiskfull, -ENOSPC}, {ERRinvname, -ENOENT}, - {ERRinvlevel,-EOPNOTSUPP}, + {ERRinvlevel, -EOPNOTSUPP}, {ERRdirnotempty, -ENOTEMPTY}, {ERRnotlocked, -ENOLCK}, {ERRcancelviolation, -ENOLCK}, {ERRalreadyexists, -EEXIST}, {ERRmoredata, -EOVERFLOW}, - {ERReasnotsupported,-EOPNOTSUPP}, + {ERReasnotsupported, -EOPNOTSUPP}, {ErrQuota, -EDQUOT}, {ErrNotALink, -ENOLINK}, - {ERRnetlogonNotStarted,-ENOPROTOOPT}, - {ErrTooManyLinks,-EMLINK}, + {ERRnetlogonNotStarted, -ENOPROTOOPT}, + {ErrTooManyLinks, -EMLINK}, {0, 0} }; @@ -133,85 +130,24 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { /* returns 0 if invalid address */ int -cifs_inet_pton(int address_family, char *cp,void *dst) +cifs_inet_pton(int address_family, char *cp, void *dst) { -#ifdef CONFIG_CIFS_EXPERIMENTAL int ret = 0; /* calculate length by finding first slash or NULL */ - /* BB Should we convert '/' slash to '\' here since it seems already done - before this */ - if( address_family == AF_INET ){ - ret = in4_pton(cp, -1 /* len */, dst , '\\', NULL); - } else if( address_family == AF_INET6 ){ + /* BB Should we convert '/' slash to '\' here since it seems already + * done before this */ + if ( address_family == AF_INET ) { + ret = in4_pton(cp, -1 /* len */, dst , '\\', NULL); + } else if ( address_family == AF_INET6 ) { ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); } #ifdef CONFIG_CIFS_DEBUG2 - cFYI(1,("address conversion returned %d for %s", ret, cp)); + cFYI(1, ("address conversion returned %d for %s", ret, cp)); #endif if (ret > 0) ret = 1; return ret; -#else - int value; - int digit; - int i; - char temp; - char bytes[4]; - char *end = bytes; - static const int addr_class_max[4] = - { 0xffffffff, 0xffffff, 0xffff, 0xff }; - - if(address_family != AF_INET) - return -EAFNOSUPPORT; - - for (i = 0; i < 4; i++) { - bytes[i] = 0; - } - - temp = *cp; - - while (TRUE) { - if (!isdigit(temp)) - return 0; - - value = 0; - digit = 0; - for (;;) { - if (isascii(temp) && isdigit(temp)) { - value = (value * 10) + temp - '0'; - temp = *++cp; - digit = 1; - } else - break; - } - - if (temp == '.') { - if ((end > bytes + 2) || (value > 255)) - return 0; - *end++ = value; - temp = *++cp; - } else if (temp == ':') { - cFYI(1,("IPv6 addresses not supported for CIFS mounts yet")); - return -1; - } else - break; - } - - /* check for last characters */ - if (temp != '\0' && (!isascii(temp) || !isspace(temp))) - if (temp != '\\') { - if (temp != '/') - return 0; - else - (*cp = '\\'); /* switch the slash the expected way */ - } - if (value > addr_class_max[end - bytes]) - return 0; - - *((__be32 *)dst) = *((__be32 *) bytes) | htonl(value); - return 1; /* success */ -#endif /* EXPERIMENTAL */ } /***************************************************************************** @@ -246,7 +182,7 @@ static const struct { ERRHRD, ERRgeneral, NT_STATUS_UNRECOGNIZED_MEDIA}, { ERRDOS, 27, NT_STATUS_NONEXISTENT_SECTOR}, /* { This NT error code was 'sqashed' - from NT_STATUS_MORE_PROCESSING_REQUIRED to NT_STATUS_OK + from NT_STATUS_MORE_PROCESSING_REQUIRED to NT_STATUS_OK during the session setup } */ { ERRDOS, ERRnomem, NT_STATUS_NO_MEMORY}, { @@ -261,7 +197,7 @@ static const struct { ERRDOS, 193, NT_STATUS_INVALID_FILE_FOR_SECTION}, { ERRDOS, ERRnoaccess, NT_STATUS_ALREADY_COMMITTED}, /* { This NT error code was 'sqashed' - from NT_STATUS_ACCESS_DENIED to NT_STATUS_TRUSTED_RELATIONSHIP_FAILURE + from NT_STATUS_ACCESS_DENIED to NT_STATUS_TRUSTED_RELATIONSHIP_FAILURE during the session setup } */ { ERRDOS, ERRnoaccess, NT_STATUS_ACCESS_DENIED}, { @@ -331,7 +267,7 @@ static const struct { ERRHRD, ERRgeneral, NT_STATUS_INVALID_ACCOUNT_NAME}, { ERRHRD, ERRgeneral, NT_STATUS_USER_EXISTS}, /* { This NT error code was 'sqashed' - from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE + from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE during the session setup } */ { ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, { @@ -341,7 +277,7 @@ static const struct { ERRHRD, ERRgeneral, NT_STATUS_MEMBER_NOT_IN_GROUP}, { ERRHRD, ERRgeneral, NT_STATUS_LAST_ADMIN}, /* { This NT error code was 'sqashed' - from NT_STATUS_WRONG_PASSWORD to NT_STATUS_LOGON_FAILURE + from NT_STATUS_WRONG_PASSWORD to NT_STATUS_LOGON_FAILURE during the session setup } */ { ERRSRV, ERRbadpw, NT_STATUS_WRONG_PASSWORD}, { @@ -393,8 +329,8 @@ static const struct { ERRHRD, ERRgeneral, NT_STATUS_FILE_INVALID}, { ERRHRD, ERRgeneral, NT_STATUS_ALLOTTED_SPACE_EXCEEDED}, /* { This NT error code was 'sqashed' - from NT_STATUS_INSUFFICIENT_RESOURCES to NT_STATUS_INSUFF_SERVER_RESOURCES - during the session setup } */ + from NT_STATUS_INSUFFICIENT_RESOURCES to + NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */ { ERRDOS, ERRnomem, NT_STATUS_INSUFFICIENT_RESOURCES}, { ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, { @@ -638,8 +574,8 @@ static const struct { ERRDOS, 19, NT_STATUS_TOO_LATE}, { ERRDOS, ERRnoaccess, NT_STATUS_NO_TRUST_LSA_SECRET}, /* { This NT error code was 'sqashed' - from NT_STATUS_NO_TRUST_SAM_ACCOUNT to NT_STATUS_TRUSTED_RELATIONSHIP_FAILURE - during the session setup } */ + from NT_STATUS_NO_TRUST_SAM_ACCOUNT to + NT_STATUS_TRUSTED_RELATIONSHIP_FAILURE during the session setup } */ { ERRDOS, ERRnoaccess, NT_STATUS_NO_TRUST_SAM_ACCOUNT}, { ERRDOS, ERRnoaccess, NT_STATUS_TRUSTED_DOMAIN_FAILURE}, { @@ -658,7 +594,7 @@ static const struct { ERRDOS, ERRnoaccess, NT_STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT}, { ERRDOS, ERRnoaccess, NT_STATUS_NOLOGON_SERVER_TRUST_ACCOUNT}, /* { This NT error code was 'sqashed' - from NT_STATUS_DOMAIN_TRUST_INCONSISTENT to NT_STATUS_LOGON_FAILURE + from NT_STATUS_DOMAIN_TRUST_INCONSISTENT to NT_STATUS_LOGON_FAILURE during the session setup } */ { ERRDOS, ERRnoaccess, NT_STATUS_DOMAIN_TRUST_INCONSISTENT}, { @@ -789,7 +725,7 @@ cifs_print_status(__u32 status_code) if (((nt_errs[idx].nt_errcode) & 0xFFFFFF) == (status_code & 0xFFFFFF)) { printk(KERN_NOTICE "Status code returned 0x%08x %s\n", - status_code,nt_errs[idx].nt_errstr); + status_code, nt_errs[idx].nt_errstr); } idx++; } @@ -821,7 +757,7 @@ int map_smb_to_linux_error(struct smb_hdr *smb) { unsigned int i; - int rc = -EIO; /* if transport error smb error may not be set */ + int rc = -EIO; /* if transport error smb error may not be set */ __u8 smberrclass; __u16 smberrcode; @@ -832,9 +768,10 @@ map_smb_to_linux_error(struct smb_hdr *smb) return 0; if (smb->Flags2 & SMBFLG2_ERR_STATUS) { - /* translate the newer STATUS codes to old style errors and then to POSIX errors */ + /* translate the newer STATUS codes to old style SMB errors + * and then to POSIX errors */ __u32 err = le32_to_cpu(smb->Status.CifsError); - if(cifsFYI & CIFS_RC) + if (cifsFYI & CIFS_RC) cifs_print_status(err); ntstatus_to_dos(err, &smberrclass, &smberrcode); } else { @@ -845,38 +782,42 @@ map_smb_to_linux_error(struct smb_hdr *smb) /* old style errors */ /* DOS class smb error codes - map DOS */ - if (smberrclass == ERRDOS) { /* one byte field no need to byte reverse */ + if (smberrclass == ERRDOS) { /* 1 byte field no need to byte reverse */ for (i = 0; i < sizeof (mapping_table_ERRDOS) / sizeof (struct smb_to_posix_error); i++) { if (mapping_table_ERRDOS[i].smb_err == 0) break; - else if (mapping_table_ERRDOS[i].smb_err == smberrcode) { + else if (mapping_table_ERRDOS[i].smb_err == + smberrcode) { rc = mapping_table_ERRDOS[i].posix_code; break; } - /* else try the next error mapping one to see if it will match */ + /* else try next error mapping one to see if match */ } - } else if (smberrclass == ERRSRV) { /* server class of error codes */ + } else if (smberrclass == ERRSRV) { /* server class of error codes */ for (i = 0; i < sizeof (mapping_table_ERRSRV) / sizeof (struct smb_to_posix_error); i++) { if (mapping_table_ERRSRV[i].smb_err == 0) break; - else if (mapping_table_ERRSRV[i].smb_err == smberrcode) { + else if (mapping_table_ERRSRV[i].smb_err == + smberrcode) { rc = mapping_table_ERRSRV[i].posix_code; break; } - /* else try the next error mapping one to see if it will match */ + /* else try next error mapping to see if match */ } } /* else ERRHRD class errors or junk - return EIO */ - cFYI(1, (" !!Mapping smb error code %d to POSIX err %d !!", smberrcode,rc)); + cFYI(1, (" !!Mapping smb error code %d to POSIX err %d !!", + smberrcode, rc)); - /* generic corrective action e.g. reconnect SMB session on ERRbaduid could be added */ + /* generic corrective action e.g. reconnect SMB session on + * ERRbaduid could be added */ return rc; } @@ -910,7 +851,7 @@ smbCalcSize_LE(struct smb_hdr *ptr) struct timespec cifs_NTtimeToUnix(u64 ntutc) { - struct timespec ts; + struct timespec ts; /* BB what about the timezone? BB */ /* Subtract the NTFS time offset, then convert to 1s intervals. */ @@ -918,7 +859,7 @@ cifs_NTtimeToUnix(u64 ntutc) t = ntutc - NTFS_TIME_OFFSET; ts.tv_nsec = do_div(t, 10000000) * 100; - ts.tv_sec = t; + ts.tv_sec = t; return ts; } @@ -946,20 +887,20 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) SMB_TIME * st = (SMB_TIME *)&time; SMB_DATE * sd = (SMB_DATE *)&date; - cFYI(1,("date %d time %d",date, time)); + cFYI(1, ("date %d time %d", date, time)); sec = 2 * st->TwoSeconds; min = st->Minutes; - if((sec > 59) || (min > 59)) - cERROR(1,("illegal time min %d sec %d", min, sec)); + if ((sec > 59) || (min > 59)) + cERROR(1, ("illegal time min %d sec %d", min, sec)); sec += (min * 60); sec += 60 * 60 * st->Hours; - if(st->Hours > 24) - cERROR(1,("illegal hours %d",st->Hours)); + if (st->Hours > 24) + cERROR(1, ("illegal hours %d", st->Hours)); days = sd->Day; month = sd->Month; - if((days > 31) || (month > 12)) - cERROR(1,("illegal date, month %d day: %d", month, days)); + if ((days > 31) || (month > 12)) + cERROR(1, ("illegal date, month %d day: %d", month, days)); month -= 1; days += total_days_of_prev_months[month]; days += 3652; /* account for difference in days between 1980 and 1970 */ @@ -970,15 +911,15 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) for years/100 except for years/400, but since the maximum number for DOS year is 2**7, the last year is 1980+127, which means we need only consider 2 special case years, ie the years 2000 and 2100, and only - adjust for the lack of leap year for the year 2100, as 2000 was a + adjust for the lack of leap year for the year 2100, as 2000 was a leap year (divisable by 400) */ - if(year >= 120) /* the year 2100 */ + if (year >= 120) /* the year 2100 */ days = days - 1; /* do not count leap year for the year 2100 */ /* adjust for leap year where we are still before leap day */ - if(year != 120) + if (year != 120) days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0); - sec += 24 * 60 * 60 * days; + sec += 24 * 60 * 60 * days; ts.tv_sec = sec; @@ -986,4 +927,4 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) ts.tv_nsec = 0; return ts; -} +} diff --git a/fs/cifs/nterr.c b/fs/cifs/nterr.c index 4da50cd34469..819fd994b121 100644 --- a/fs/cifs/nterr.c +++ b/fs/cifs/nterr.c @@ -1,19 +1,19 @@ -/* +/* * Unix SMB/Netbios implementation. * Version 1.9. * RPC Pipe client / server routines * Copyright (C) Luke Kenneth Casson Leighton 1997-2001. - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. diff --git a/fs/cifs/nterr.h b/fs/cifs/nterr.h index d2fb06c97dfa..588abbb9d08c 100644 --- a/fs/cifs/nterr.h +++ b/fs/cifs/nterr.h @@ -1,4 +1,4 @@ -/* +/* Unix SMB/Netbios implementation. Version 1.9. NT error code constants @@ -6,17 +6,17 @@ Copyright (C) John H Terpstra 1996-2000 Copyright (C) Luke Kenneth Casson Leighton 1996-2000 Copyright (C) Paul Ashton 1998-2000 - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index d39b712a11c5..7170a9b70f1e 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -1,7 +1,7 @@ /* * fs/cifs/ntlmssp.h * - * Copyright (c) International Business Machines Corp., 2002,2006 + * Copyright (c) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define NTLMSSP_SIGNATURE "NTLMSSP" @@ -27,18 +27,18 @@ #define UnknownMessage cpu_to_le32(8) /* Negotiate Flags */ -#define NTLMSSP_NEGOTIATE_UNICODE 0x01 // Text strings are in unicode -#define NTLMSSP_NEGOTIATE_OEM 0x02 // Text strings are in OEM -#define NTLMSSP_REQUEST_TARGET 0x04 // Server return its auth realm -#define NTLMSSP_NEGOTIATE_SIGN 0x0010 // Request signature capability -#define NTLMSSP_NEGOTIATE_SEAL 0x0020 // Request confidentiality +#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are in unicode */ +#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */ +#define NTLMSSP_REQUEST_TARGET 0x04 /* Server return its auth realm */ +#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signature capability */ +#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */ #define NTLMSSP_NEGOTIATE_DGRAM 0x0040 -#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 // Use LM session key for sign/seal -#define NTLMSSP_NEGOTIATE_NTLM 0x0200 // NTLM authentication +#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Sign/seal use LM session key */ +#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */ #define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 #define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000 -#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 // client/server on same machine -#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 // Sign for all security levels +#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server on same machine */ +#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign for all security levels */ #define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000 #define NTLMSSP_TARGET_TYPE_SERVER 0x20000 #define NTLMSSP_TARGET_TYPE_SHARE 0x40000 diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index c08bda9fcac6..916df9431336 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -2,7 +2,7 @@ * fs/cifs/readdir.c * * Directory search handling - * + * * Copyright (C) International Business Machines Corp., 2004, 2007 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -34,24 +34,23 @@ #ifdef CONFIG_CIFS_DEBUG2 static void dump_cifs_file_struct(struct file *file, char *label) { - struct cifsFileInfo * cf; + struct cifsFileInfo *cf; if (file) { cf = file->private_data; if (cf == NULL) { - cFYI(1,("empty cifs private file data")); + cFYI(1, ("empty cifs private file data")); return; } if (cf->invalidHandle) { - cFYI(1,("invalid handle")); + cFYI(1, ("invalid handle")); } if (cf->srch_inf.endOfSearch) { - cFYI(1,("end of search")); + cFYI(1, ("end of search")); } if (cf->srch_inf.emptyDir) { - cFYI(1,("empty dir")); + cFYI(1, ("empty dir")); } - } } #endif /* DEBUG2 */ @@ -73,7 +72,8 @@ static int construct_dentry(struct qstr *qstring, struct file *file, qstring->hash = full_name_hash(qstring->name, qstring->len); tmp_dentry = d_lookup(file->f_path.dentry, qstring); if (tmp_dentry) { - cFYI(0, ("existing dentry with inode 0x%p", tmp_dentry->d_inode)); + cFYI(0, ("existing dentry with inode 0x%p", + tmp_dentry->d_inode)); *ptmp_inode = tmp_dentry->d_inode; /* BB overwrite old name? i.e. tmp_dentry->d_name and tmp_dentry->d_name.len??*/ if (*ptmp_inode == NULL) { @@ -87,7 +87,7 @@ static int construct_dentry(struct qstr *qstring, struct file *file, } else { tmp_dentry = d_alloc(file->f_path.dentry, qstring); if (tmp_dentry == NULL) { - cERROR(1,("Failed allocating dentry")); + cERROR(1, ("Failed allocating dentry")); *ptmp_inode = NULL; return rc; } @@ -100,7 +100,7 @@ static int construct_dentry(struct qstr *qstring, struct file *file, if (*ptmp_inode == NULL) return rc; if (file->f_path.dentry->d_sb->s_flags & MS_NOATIME) - (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME; + (*ptmp_inode)->i_flags |= S_NOATIME | S_NOCMTIME; rc = 2; } @@ -109,7 +109,7 @@ static int construct_dentry(struct qstr *qstring, struct file *file, return rc; } -static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode) +static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode) { if ((tcon) && (tcon->ses) && (tcon->ses->server)) { inode->i_ctime.tv_sec += tcon->ses->server->timeAdj; @@ -121,7 +121,7 @@ static void AdjustForTZ(struct cifsTconInfo * tcon, struct inode * inode) static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, - char * buf, int *pobject_type, int isNewInode) + char *buf, int *pobject_type, int isNewInode) { loff_t local_size; struct timespec local_mtime; @@ -150,7 +150,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); } else { /* legacy, OS2 and DOS style */ /* struct timespec ts;*/ - FIND_FILE_STANDARD_INFO * pfindData = + FIND_FILE_STANDARD_INFO * pfindData = (FIND_FILE_STANDARD_INFO *)buf; tmp_inode->i_mtime = cnvrtDosUnixTm( @@ -175,7 +175,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, /* treat dos attribute of read-only as read-only mode bit e.g. 555? */ /* 2767 perms - indicate mandatory locking */ - /* BB fill in uid and gid here? with help from winbind? + /* BB fill in uid and gid here? with help from winbind? or retrieve from NTFS stream extended attribute */ if (atomic_read(&cifsInfo->inUse) == 0) { tmp_inode->i_uid = cifs_sb->mnt_uid; @@ -196,7 +196,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_mode = cifs_sb->mnt_dir_mode; } tmp_inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && + } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && (attr & ATTR_SYSTEM)) { if (end_of_file == 0) { *pobject_type = DT_FIFO; @@ -206,13 +206,13 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, inode as needing revalidate and get the real type (blk vs chr vs. symlink) later ie in lookup */ *pobject_type = DT_REG; - tmp_inode->i_mode |= S_IFREG; - cifsInfo->time = 0; + tmp_inode->i_mode |= S_IFREG; + cifsInfo->time = 0; } /* we no longer mark these because we could not follow them */ /* } else if (attr & ATTR_REPARSE) { - *pobject_type = DT_LNK; - tmp_inode->i_mode |= S_IFLNK; */ + *pobject_type = DT_LNK; + tmp_inode->i_mode |= S_IFLNK; */ } else { *pobject_type = DT_REG; tmp_inode->i_mode |= S_IFREG; @@ -220,7 +220,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_mode &= ~(S_IWUGO); else if ((tmp_inode->i_mode & S_IWUGO) == 0) /* the ATTR_READONLY flag may have been changed on */ - /* server -- set any w bits allowed by mnt_file_mode */ + /* server -- set any w bits allowed by mnt_file_mode */ tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); } /* could add code here - to validate if device or weird share type? */ @@ -231,7 +231,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, spin_lock(&tmp_inode->i_lock); if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* can not safely change the file size here if the + /* can not safely change the file size here if the client is writing to it due to potential races */ i_size_write(tmp_inode, end_of_file); @@ -254,7 +254,6 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_fop = &cifs_file_direct_nobrl_ops; else tmp_inode->i_fop = &cifs_file_direct_ops; - } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) tmp_inode->i_fop = &cifs_file_nobrl_ops; else @@ -322,8 +321,8 @@ static void unix_fill_in_inode(struct inode *tmp_inode, tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions); /* since we set the inode type below we need to mask off type - to avoid strange results if bits above were corrupt */ - tmp_inode->i_mode &= ~S_IFMT; + to avoid strange results if bits above were corrupt */ + tmp_inode->i_mode &= ~S_IFMT; if (type == UNIX_FILE) { *pobject_type = DT_REG; tmp_inode->i_mode |= S_IFREG; @@ -353,7 +352,7 @@ static void unix_fill_in_inode(struct inode *tmp_inode, /* safest to just call it a file */ *pobject_type = DT_REG; tmp_inode->i_mode |= S_IFREG; - cFYI(1,("unknown inode type %d",type)); + cFYI(1, ("unknown inode type %d", type)); } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) @@ -368,7 +367,7 @@ static void unix_fill_in_inode(struct inode *tmp_inode, spin_lock(&tmp_inode->i_lock); if (is_size_safe_to_change(cifsInfo, end_of_file)) { - /* can not safely change the file size here if the + /* can not safely change the file size here if the client is writing to it due to potential races */ i_size_write(tmp_inode, end_of_file); @@ -393,15 +392,16 @@ static void unix_fill_in_inode(struct inode *tmp_inode, tmp_inode->i_fop = &cifs_file_ops; if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) && - (cifs_sb->tcon->ses->server->maxBuf < + (cifs_sb->tcon->ses->server->maxBuf < PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)) tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf; else tmp_inode->i_data.a_ops = &cifs_addr_ops; if (isNewInode) - return; /* No sense invalidating pages for new inode since we - have not started caching readahead file data yet */ + return; /* No sense invalidating pages for new inode + since we have not started caching readahead + file data for it yet */ if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) && (local_size == tmp_inode->i_size)) { @@ -420,7 +420,7 @@ static void unix_fill_in_inode(struct inode *tmp_inode, tmp_inode->i_op = &cifs_symlink_inode_ops; /* tmp_inode->i_fop = *//* do not need to set to anything */ } else { - cFYI(1, ("Special inode")); + cFYI(1, ("Special inode")); init_special_inode(tmp_inode, tmp_inode->i_mode, tmp_inode->i_rdev); } @@ -429,14 +429,14 @@ static void unix_fill_in_inode(struct inode *tmp_inode, static int initiate_cifs_search(const int xid, struct file *file) { int rc = 0; - char * full_path; - struct cifsFileInfo * cifsFile; + char *full_path; + struct cifsFileInfo *cifsFile; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; if (file->private_data == NULL) { - file->private_data = - kzalloc(sizeof(struct cifsFileInfo),GFP_KERNEL); + file->private_data = + kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); } if (file->private_data == NULL) @@ -463,9 +463,11 @@ static int initiate_cifs_search(const int xid, struct file *file) ffirst_retry: /* test for Unix extensions */ - if (pTcon->ses->capabilities & CAP_UNIX) { + /* but now check for them on the share/mount not on the SMB session */ +/* if (pTcon->ses->capabilities & CAP_UNIX) { */ + if (pTcon->unix_ext) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_UNIX; - } else if ((pTcon->ses->capabilities & + } else if ((pTcon->ses->capabilities & (CAP_NT_SMBS | CAP_NT_FIND)) == 0) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_INFO_STANDARD; } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { @@ -474,13 +476,13 @@ ffirst_retry: cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; } - rc = CIFSFindFirst(xid, pTcon,full_path,cifs_sb->local_nls, + rc = CIFSFindFirst(xid, pTcon, full_path, cifs_sb->local_nls, &cifsFile->netfid, &cifsFile->srch_inf, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); if (rc == 0) cifsFile->invalidHandle = FALSE; - if ((rc == -EOPNOTSUPP) && + if ((rc == -EOPNOTSUPP) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; goto ffirst_retry; @@ -495,17 +497,17 @@ static int cifs_unicode_bytelen(char *str) int len; __le16 * ustr = (__le16 *)str; - for(len=0;len <= PATH_MAX;len++) { + for (len = 0; len <= PATH_MAX; len++) { if (ustr[len] == 0) return len << 1; } - cFYI(1,("Unicode string longer than PATH_MAX found")); + cFYI(1, ("Unicode string longer than PATH_MAX found")); return len << 1; } static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) { - char * new_entry; + char *new_entry; FILE_DIRECTORY_INFO * pDirInfo = (FILE_DIRECTORY_INFO *)old_entry; if (level == SMB_FIND_FILE_INFO_STANDARD) { @@ -516,21 +518,21 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) pfData->FileNameLength; } else new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); - cFYI(1,("new entry %p old entry %p",new_entry,old_entry)); + cFYI(1, ("new entry %p old entry %p", new_entry, old_entry)); /* validate that new_entry is not past end of SMB */ if (new_entry >= end_of_smb) { cERROR(1, ("search entry %p began after end of SMB %p old entry %p", - new_entry, end_of_smb, old_entry)); + new_entry, end_of_smb, old_entry)); return NULL; } else if (((level == SMB_FIND_FILE_INFO_STANDARD) && - (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) || - ((level != SMB_FIND_FILE_INFO_STANDARD) && + (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) + || ((level != SMB_FIND_FILE_INFO_STANDARD) && (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { - cERROR(1,("search entry %p extends after end of SMB %p", + cERROR(1, ("search entry %p extends after end of SMB %p", new_entry, end_of_smb)); return NULL; - } else + } else return new_entry; } @@ -541,8 +543,8 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) { int rc = 0; - char * filename = NULL; - int len = 0; + char *filename = NULL; + int len = 0; if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) { FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; @@ -554,25 +556,25 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) len = strnlen(filename, 5); } } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) { - FILE_DIRECTORY_INFO * pFindData = + FILE_DIRECTORY_INFO * pFindData = (FILE_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); - } else if (cfile->srch_inf.info_level == + } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { - FILE_FULL_DIRECTORY_INFO * pFindData = + FILE_FULL_DIRECTORY_INFO * pFindData = (FILE_FULL_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { - SEARCH_ID_FULL_DIR_INFO * pFindData = + SEARCH_ID_FULL_DIR_INFO * pFindData = (SEARCH_ID_FULL_DIR_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); - } else if (cfile->srch_inf.info_level == + } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { - FILE_BOTH_DIRECTORY_INFO * pFindData = + FILE_BOTH_DIRECTORY_INFO * pFindData = (FILE_BOTH_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); @@ -582,7 +584,8 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) filename = &pFindData->FileName[0]; len = pFindData->FileNameLength; } else { - cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); + cFYI(1, ("Unknown findfirst level %d", + cfile->srch_inf.info_level)); } if (filename) { @@ -595,15 +598,15 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) } else if (len == 4) { /* check for .. */ if ((ufilename[0] == UNICODE_DOT) - &&(ufilename[1] == UNICODE_DOT)) + && (ufilename[1] == UNICODE_DOT)) rc = 2; } } else /* ASCII */ { if (len == 1) { - if (filename[0] == '.') + if (filename[0] == '.') rc = 1; } else if (len == 2) { - if((filename[0] == '.') && (filename[1] == '.')) + if ((filename[0] == '.') && (filename[1] == '.')) rc = 2; } } @@ -614,7 +617,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) /* Check if directory that we are searching has changed so we can decide whether we can use the cached search results from the previous search */ -static int is_dir_changed(struct file * file) +static int is_dir_changed(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct cifsInodeInfo *cifsInfo = CIFS_I(inode); @@ -633,22 +636,22 @@ static int is_dir_changed(struct file * file) /* We start counting in the buffer with entry 2 and increment for every entry (do not increment for . or .. entry) */ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, - struct file *file, char **ppCurrentEntry, int *num_to_ret) + struct file *file, char **ppCurrentEntry, int *num_to_ret) { int rc = 0; int pos_in_buf = 0; loff_t first_entry_in_buffer; loff_t index_to_find = file->f_pos; - struct cifsFileInfo * cifsFile = file->private_data; + struct cifsFileInfo *cifsFile = file->private_data; /* check if index in the buffer */ - - if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || + + if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || (num_to_ret == NULL)) return -ENOENT; - + *ppCurrentEntry = NULL; - first_entry_in_buffer = - cifsFile->srch_inf.index_of_last_entry - + first_entry_in_buffer = + cifsFile->srch_inf.index_of_last_entry - cifsFile->srch_inf.entries_in_buffer; /* if first entry in buf is zero then is first buffer @@ -660,17 +663,17 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, #ifdef CONFIG_CIFS_DEBUG2 dump_cifs_file_struct(file, "In fce "); #endif - if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && - is_dir_changed(file)) || + if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && + is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) { /* close and restart search */ - cFYI(1,("search backing up - close and restart search")); + cFYI(1, ("search backing up - close and restart search")); cifsFile->invalidHandle = TRUE; CIFSFindClose(xid, pTcon, cifsFile->netfid); kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = NULL; if (cifsFile->srch_inf.ntwrk_buf_start) { - cFYI(1,("freeing SMB ff cache buf on search rewind")); + cFYI(1, ("freeing SMB ff cache buf on search rewind")); if (cifsFile->srch_inf.smallBuf) cifs_small_buf_release(cifsFile->srch_inf. ntwrk_buf_start); @@ -678,17 +681,18 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, cifs_buf_release(cifsFile->srch_inf. ntwrk_buf_start); } - rc = initiate_cifs_search(xid,file); + rc = initiate_cifs_search(xid, file); if (rc) { - cFYI(1,("error %d reinitiating a search on rewind",rc)); + cFYI(1, ("error %d reinitiating a search on rewind", + rc)); return rc; } } - while((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && - (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)){ - cFYI(1,("calling findnext2")); - rc = CIFSFindNext(xid,pTcon,cifsFile->netfid, + while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && + (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)) { + cFYI(1, ("calling findnext2")); + rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, &cifsFile->srch_inf); if (rc) return -ENOENT; @@ -697,8 +701,8 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, /* we found the buffer that contains the entry */ /* scan and find it */ int i; - char * current_entry; - char * end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + + char *current_entry; + char *end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + smbCalcSize((struct smb_hdr *) cifsFile->srch_inf.ntwrk_buf_start); @@ -706,28 +710,28 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry - cifsFile->srch_inf.entries_in_buffer; pos_in_buf = index_to_find - first_entry_in_buffer; - cFYI(1,("found entry - pos_in_buf %d",pos_in_buf)); + cFYI(1, ("found entry - pos_in_buf %d", pos_in_buf)); - for(i=0;(i<(pos_in_buf)) && (current_entry != NULL);i++) { + for (i=0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { /* go entry by entry figuring out which is first */ - current_entry = nxt_dir_entry(current_entry,end_of_smb, + current_entry = nxt_dir_entry(current_entry, end_of_smb, cifsFile->srch_inf.info_level); } - if((current_entry == NULL) && (i < pos_in_buf)) { + if ((current_entry == NULL) && (i < pos_in_buf)) { /* BB fixme - check if we should flag this error */ - cERROR(1,("reached end of buf searching for pos in buf" + cERROR(1, ("reached end of buf searching for pos in buf" " %d index to find %lld rc %d", - pos_in_buf,index_to_find,rc)); + pos_in_buf, index_to_find, rc)); } rc = 0; *ppCurrentEntry = current_entry; } else { - cFYI(1,("index not in buffer - could not findnext into it")); + cFYI(1, ("index not in buffer - could not findnext into it")); return 0; } - if(pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { - cFYI(1,("can not return entries pos_in_buf beyond last entry")); + if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { + cFYI(1, ("can not return entries pos_in_buf beyond last")); *num_to_ret = 0; } else *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; @@ -738,81 +742,81 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, /* inode num, inode type and filename returned */ static int cifs_get_name_from_search_buf(struct qstr *pqst, char *current_entry, __u16 level, unsigned int unicode, - struct cifs_sb_info * cifs_sb, int max_len, ino_t *pinum) + struct cifs_sb_info *cifs_sb, int max_len, ino_t *pinum) { int rc = 0; unsigned int len = 0; - char * filename; - struct nls_table * nlt = cifs_sb->local_nls; + char *filename; + struct nls_table *nlt = cifs_sb->local_nls; *pinum = 0; - if(level == SMB_FIND_FILE_UNIX) { - FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; + if (level == SMB_FIND_FILE_UNIX) { + FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; filename = &pFindData->FileName[0]; - if(unicode) { + if (unicode) { len = cifs_unicode_bytelen(filename); } else { /* BB should we make this strnlen of PATH_MAX? */ len = strnlen(filename, PATH_MAX); } - /* BB fixme - hash low and high 32 bits if not 64 bit arch BB fixme */ - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + /* BB fixme - hash low and high 32 bits if not 64 bit arch BB */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) *pinum = pFindData->UniqueId; - } else if(level == SMB_FIND_FILE_DIRECTORY_INFO) { - FILE_DIRECTORY_INFO * pFindData = + } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { + FILE_DIRECTORY_INFO *pFindData = (FILE_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); - } else if(level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { - FILE_FULL_DIRECTORY_INFO * pFindData = + } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { + FILE_FULL_DIRECTORY_INFO *pFindData = (FILE_FULL_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); - } else if(level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { - SEARCH_ID_FULL_DIR_INFO * pFindData = + } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { + SEARCH_ID_FULL_DIR_INFO *pFindData = (SEARCH_ID_FULL_DIR_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); *pinum = pFindData->UniqueId; - } else if(level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { - FILE_BOTH_DIRECTORY_INFO * pFindData = + } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { + FILE_BOTH_DIRECTORY_INFO *pFindData = (FILE_BOTH_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); - } else if(level == SMB_FIND_FILE_INFO_STANDARD) { + } else if (level == SMB_FIND_FILE_INFO_STANDARD) { FIND_FILE_STANDARD_INFO * pFindData = (FIND_FILE_STANDARD_INFO *)current_entry; filename = &pFindData->FileName[0]; /* one byte length, no name conversion */ len = (unsigned int)pFindData->FileNameLength; } else { - cFYI(1,("Unknown findfirst level %d",level)); + cFYI(1, ("Unknown findfirst level %d", level)); return -EINVAL; } - if(len > max_len) { - cERROR(1,("bad search response length %d past smb end", len)); + if (len > max_len) { + cERROR(1, ("bad search response length %d past smb end", len)); return -EINVAL; } - if(unicode) { + if (unicode) { /* BB fixme - test with long names */ /* Note converted filename can be longer than in unicode */ - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) pqst->len = cifs_convertUCSpath((char *)pqst->name, (__le16 *)filename, len/2, nlt); else pqst->len = cifs_strfromUCS_le((char *)pqst->name, - (__le16 *)filename,len/2,nlt); + (__le16 *)filename, len/2, nlt); } else { pqst->name = filename; pqst->len = len; } - pqst->hash = full_name_hash(pqst->name,pqst->len); -/* cFYI(1,("filldir on %s",pqst->name)); */ + pqst->hash = full_name_hash(pqst->name, pqst->len); +/* cFYI(1, ("filldir on %s",pqst->name)); */ return rc; } @@ -821,49 +825,50 @@ static int cifs_filldir(char *pfindEntry, struct file *file, { int rc = 0; struct qstr qstring; - struct cifsFileInfo * pCifsF; + struct cifsFileInfo *pCifsF; unsigned obj_type; ino_t inum; - struct cifs_sb_info * cifs_sb; + struct cifs_sb_info *cifs_sb; struct inode *tmp_inode; struct dentry *tmp_dentry; /* get filename and len into qstring */ /* get dentry */ /* decide whether to create and populate ionde */ - if((direntry == NULL) || (file == NULL)) + if ((direntry == NULL) || (file == NULL)) return -EINVAL; pCifsF = file->private_data; - - if((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL)) + + if ((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL)) return -ENOENT; - rc = cifs_entry_is_dot(pfindEntry,pCifsF); + rc = cifs_entry_is_dot(pfindEntry, pCifsF); /* skip . and .. since we added them first */ - if(rc != 0) + if (rc != 0) return 0; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); qstring.name = scratch_buf; - rc = cifs_get_name_from_search_buf(&qstring,pfindEntry, + rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, pCifsF->srch_inf.info_level, - pCifsF->srch_inf.unicode,cifs_sb, + pCifsF->srch_inf.unicode, cifs_sb, max_len, &inum /* returned */); - if(rc) + if (rc) return rc; - rc = construct_dentry(&qstring,file,&tmp_inode, &tmp_dentry); - if((tmp_inode == NULL) || (tmp_dentry == NULL)) + rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry); + if ((tmp_inode == NULL) || (tmp_dentry == NULL)) return -ENOMEM; - if(rc) { + if (rc) { /* inode created, we need to hash it with right inode number */ - if(inum != 0) { - /* BB fixme - hash the 2 32 quantities bits together if necessary BB */ + if (inum != 0) { + /* BB fixme - hash the 2 32 quantities bits together if + * necessary BB */ tmp_inode->i_ino = inum; } insert_inode_hash(tmp_inode); @@ -872,27 +877,27 @@ static int cifs_filldir(char *pfindEntry, struct file *file, /* we pass in rc below, indicating whether it is a new inode, so we can figure out whether to invalidate the inode cached data if the file has changed */ - if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) + if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) unix_fill_in_inode(tmp_inode, (FILE_UNIX_INFO *)pfindEntry, &obj_type, rc); - else if(pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) + else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */, pfindEntry, &obj_type, rc); else fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); - if(rc) /* new inode - needs to be tied to dentry */ { + if (rc) /* new inode - needs to be tied to dentry */ { d_instantiate(tmp_dentry, tmp_inode); - if(rc == 2) + if (rc == 2) d_rehash(tmp_dentry); } - - - rc = filldir(direntry,qstring.name,qstring.len,file->f_pos, - tmp_inode->i_ino,obj_type); - if(rc) { - cFYI(1,("filldir rc = %d",rc)); + + + rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, + tmp_inode->i_ino, obj_type); + if (rc) { + cFYI(1, ("filldir rc = %d", rc)); /* we can not return filldir errors to the caller since they are "normal" when the stat blocksize is too small - we return remapped error instead */ @@ -909,57 +914,57 @@ static int cifs_save_resume_key(const char *current_entry, int rc = 0; unsigned int len = 0; __u16 level; - char * filename; + char *filename; - if((cifsFile == NULL) || (current_entry == NULL)) + if ((cifsFile == NULL) || (current_entry == NULL)) return -EINVAL; level = cifsFile->srch_inf.info_level; - if(level == SMB_FIND_FILE_UNIX) { + if (level == SMB_FIND_FILE_UNIX) { FILE_UNIX_INFO * pFindData = (FILE_UNIX_INFO *)current_entry; filename = &pFindData->FileName[0]; - if(cifsFile->srch_inf.unicode) { + if (cifsFile->srch_inf.unicode) { len = cifs_unicode_bytelen(filename); } else { /* BB should we make this strnlen of PATH_MAX? */ len = strnlen(filename, PATH_MAX); } cifsFile->srch_inf.resume_key = pFindData->ResumeKey; - } else if(level == SMB_FIND_FILE_DIRECTORY_INFO) { - FILE_DIRECTORY_INFO * pFindData = + } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { + FILE_DIRECTORY_INFO *pFindData = (FILE_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if(level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { - FILE_FULL_DIRECTORY_INFO * pFindData = + } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { + FILE_FULL_DIRECTORY_INFO *pFindData = (FILE_FULL_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if(level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { - SEARCH_ID_FULL_DIR_INFO * pFindData = + } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { + SEARCH_ID_FULL_DIR_INFO *pFindData = (SEARCH_ID_FULL_DIR_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if(level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { - FILE_BOTH_DIRECTORY_INFO * pFindData = + } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { + FILE_BOTH_DIRECTORY_INFO *pFindData = (FILE_BOTH_DIRECTORY_INFO *)current_entry; filename = &pFindData->FileName[0]; len = le32_to_cpu(pFindData->FileNameLength); cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if(level == SMB_FIND_FILE_INFO_STANDARD) { - FIND_FILE_STANDARD_INFO * pFindData = + } else if (level == SMB_FIND_FILE_INFO_STANDARD) { + FIND_FILE_STANDARD_INFO *pFindData = (FIND_FILE_STANDARD_INFO *)current_entry; filename = &pFindData->FileName[0]; /* one byte length, no name conversion */ len = (unsigned int)pFindData->FileNameLength; cifsFile->srch_inf.resume_key = pFindData->ResumeKey; } else { - cFYI(1,("Unknown findfirst level %d",level)); + cFYI(1, ("Unknown findfirst level %d", level)); return -EINVAL; } cifsFile->srch_inf.resume_name_len = len; @@ -970,21 +975,21 @@ static int cifs_save_resume_key(const char *current_entry, int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) { int rc = 0; - int xid,i; + int xid, i; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; struct cifsFileInfo *cifsFile = NULL; - char * current_entry; + char *current_entry; int num_to_fill = 0; - char * tmp_buf = NULL; - char * end_of_smb; + char *tmp_buf = NULL; + char *end_of_smb; int max_len; xid = GetXid(); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); pTcon = cifs_sb->tcon; - if(pTcon == NULL) + if (pTcon == NULL) return -EINVAL; switch ((int) file->f_pos) { @@ -1005,27 +1010,27 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } file->f_pos++; default: - /* 1) If search is active, - is in current search buffer? + /* 1) If search is active, + is in current search buffer? if it before then restart search if after then keep searching till find it */ - if(file->private_data == NULL) { - rc = initiate_cifs_search(xid,file); - cFYI(1,("initiate cifs search rc %d",rc)); - if(rc) { + if (file->private_data == NULL) { + rc = initiate_cifs_search(xid, file); + cFYI(1, ("initiate cifs search rc %d", rc)); + if (rc) { FreeXid(xid); return rc; } } - if(file->private_data == NULL) { + if (file->private_data == NULL) { rc = -EINVAL; FreeXid(xid); return rc; } cifsFile = file->private_data; if (cifsFile->srch_inf.endOfSearch) { - if(cifsFile->srch_inf.emptyDir) { + if (cifsFile->srch_inf.emptyDir) { cFYI(1, ("End of search, empty dir")); rc = 0; break; @@ -1033,23 +1038,23 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } /* else { cifsFile->invalidHandle = TRUE; CIFSFindClose(xid, pTcon, cifsFile->netfid); - } + } kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = NULL; */ - rc = find_cifs_entry(xid,pTcon, file, - ¤t_entry,&num_to_fill); - if(rc) { - cFYI(1,("fce error %d",rc)); + rc = find_cifs_entry(xid, pTcon, file, + ¤t_entry, &num_to_fill); + if (rc) { + cFYI(1, ("fce error %d", rc)); goto rddir2_exit; } else if (current_entry != NULL) { - cFYI(1,("entry %lld found",file->f_pos)); + cFYI(1, ("entry %lld found", file->f_pos)); } else { - cFYI(1,("could not find entry")); + cFYI(1, ("could not find entry")); goto rddir2_exit; } - cFYI(1,("loop through %d times filling dir for net buf %p", - num_to_fill,cifsFile->srch_inf.ntwrk_buf_start)); + cFYI(1, ("loop through %d times filling dir for net buf %p", + num_to_fill, cifsFile->srch_inf.ntwrk_buf_start)); max_len = smbCalcSize((struct smb_hdr *) cifsFile->srch_inf.ntwrk_buf_start); end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; @@ -1059,8 +1064,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) such multibyte target UTF-8 characters. cifs_unicode.c, which actually does the conversion, has the same limit */ tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL); - for(i=0;(i<num_to_fill) && (rc == 0);i++) { - if(current_entry == NULL) { + for (i = 0; (i < num_to_fill) && (rc == 0); i++) { + if (current_entry == NULL) { /* evaluate whether this case is an error */ cERROR(1,("past end of SMB num to fill %d i %d", num_to_fill, i)); @@ -1070,20 +1075,20 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) we want to check for that here? */ rc = cifs_filldir(current_entry, file, filldir, direntry, tmp_buf, max_len); - if(rc == -EOVERFLOW) { + if (rc == -EOVERFLOW) { rc = 0; break; } file->f_pos++; - if(file->f_pos == + if (file->f_pos == cifsFile->srch_inf.index_of_last_entry) { - cFYI(1,("last entry in buf at pos %lld %s", - file->f_pos,tmp_buf)); - cifs_save_resume_key(current_entry,cifsFile); + cFYI(1, ("last entry in buf at pos %lld %s", + file->f_pos, tmp_buf)); + cifs_save_resume_key(current_entry, cifsFile); break; - } else - current_entry = + } else + current_entry = nxt_dir_entry(current_entry, end_of_smb, cifsFile->srch_inf.info_level); } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 758464630893..2ea027dda215 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -3,7 +3,7 @@ * * SMB/CIFS session setup handling routines * - * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) International Business Machines Corp., 2006, 2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -31,7 +31,7 @@ #include <linux/utsname.h> extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, - unsigned char *p24); + unsigned char *p24); static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) { @@ -45,13 +45,14 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ - /* BB verify whether signing required on neg or just on auth frame + /* BB verify whether signing required on neg or just on auth frame (and NTLM case) */ capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + if (ses->server->secMode & + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (ses->capabilities & CAP_UNICODE) { @@ -74,10 +75,10 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) return capabilities; } -static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, - const struct nls_table * nls_cp) +static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, + const struct nls_table *nls_cp) { - char * bcc_ptr = *pbcc_area; + char *bcc_ptr = *pbcc_area; int bytes_ret = 0; /* BB FIXME add check that strings total less @@ -89,7 +90,7 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, bcc_ptr++; } */ /* copy user */ - if(ses->userName == NULL) { + if (ses->userName == NULL) { /* null user mount */ *bcc_ptr = 0; *(bcc_ptr+1) = 0; @@ -100,14 +101,14 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null termination */ /* copy domain */ - if(ses->domainName == NULL) { + if (ses->domainName == NULL) { /* Sending null domain better than using a bogus domain name (as we did briefly in 2.6.18) since server will use its default */ *bcc_ptr = 0; *(bcc_ptr+1) = 0; bytes_ret = 0; } else - bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, + bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, 256, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null terminator */ @@ -122,37 +123,37 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, bcc_ptr += 2; /* trailing null */ bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, - 32, nls_cp); + 32, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* trailing null */ *pbcc_area = bcc_ptr; } -static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, - const struct nls_table * nls_cp) +static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, + const struct nls_table *nls_cp) { - char * bcc_ptr = *pbcc_area; + char *bcc_ptr = *pbcc_area; /* copy user */ /* BB what about null user mounts - check that we do this BB */ - /* copy user */ - if(ses->userName == NULL) { - /* BB what about null user mounts - check that we do this BB */ - } else { /* 300 should be long enough for any conceivable user name */ - strncpy(bcc_ptr, ses->userName, 300); - } + /* copy user */ + if (ses->userName == NULL) { + /* BB what about null user mounts - check that we do this BB */ + } else { /* 300 should be long enough for any conceivable user name */ + strncpy(bcc_ptr, ses->userName, 300); + } /* BB improve check for overflow */ - bcc_ptr += strnlen(ses->userName, 300); + bcc_ptr += strnlen(ses->userName, 300); *bcc_ptr = 0; - bcc_ptr++; /* account for null termination */ + bcc_ptr++; /* account for null termination */ - /* copy domain */ - - if(ses->domainName != NULL) { - strncpy(bcc_ptr, ses->domainName, 256); + /* copy domain */ + + if (ses->domainName != NULL) { + strncpy(bcc_ptr, ses->domainName, 256); bcc_ptr += strnlen(ses->domainName, 256); - } /* else we will send a null domain name + } /* else we will send a null domain name so the server will default to its own domain */ *bcc_ptr = 0; bcc_ptr++; @@ -167,19 +168,20 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; - *pbcc_area = bcc_ptr; + *pbcc_area = bcc_ptr; } -static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses, - const struct nls_table * nls_cp) +static int decode_unicode_ssetup(char **pbcc_area, int bleft, + struct cifsSesInfo *ses, + const struct nls_table *nls_cp) { int rc = 0; int words_left, len; - char * data = *pbcc_area; + char *data = *pbcc_area; - cFYI(1,("bleft %d",bleft)); + cFYI(1, ("bleft %d", bleft)); /* SMB header is unaligned, so cifs servers word align start of @@ -189,7 +191,7 @@ static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInf their final Unicode string - in which case we now will not attempt to decode the byte of junk which follows it */ - + words_left = bleft / 2; /* save off server operating system */ @@ -198,14 +200,14 @@ static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInf /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ - if(len >= words_left) + if (len >= words_left) return rc; - if(ses->serverOS) + if (ses->serverOS) kfree(ses->serverOS); /* UTF-8 string will not grow more than four times as big as UCS-16 */ ses->serverOS = kzalloc(4 * len, GFP_KERNEL); - if(ses->serverOS != NULL) { + if (ses->serverOS != NULL) { cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); } @@ -215,67 +217,68 @@ static int decode_unicode_ssetup(char ** pbcc_area, int bleft, struct cifsSesInf /* save off server network operating system */ len = UniStrnlen((wchar_t *) data, words_left); - if(len >= words_left) + if (len >= words_left) return rc; - if(ses->serverNOS) + if (ses->serverNOS) kfree(ses->serverNOS); ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */ - if(ses->serverNOS != NULL) { + if (ses->serverNOS != NULL) { cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, nls_cp); - if(strncmp(ses->serverNOS, "NT LAN Manager 4",16) == 0) { - cFYI(1,("NT4 server")); + if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) { + cFYI(1, ("NT4 server")); ses->flags |= CIFS_SES_NT4; } } data += 2 * (len + 1); words_left -= len + 1; - /* save off server domain */ - len = UniStrnlen((wchar_t *) data, words_left); - - if(len > words_left) - return rc; - - if(ses->serverDomain) - kfree(ses->serverDomain); - ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ - if(ses->serverDomain != NULL) { - cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, - nls_cp); - ses->serverDomain[2*len] = 0; - ses->serverDomain[(2*len) + 1] = 0; - } - data += 2 * (len + 1); - words_left -= len + 1; - - cFYI(1,("words left: %d",words_left)); + /* save off server domain */ + len = UniStrnlen((wchar_t *) data, words_left); + + if (len > words_left) + return rc; + + if (ses->serverDomain) + kfree(ses->serverDomain); + ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ + if (ses->serverDomain != NULL) { + cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, + nls_cp); + ses->serverDomain[2*len] = 0; + ses->serverDomain[(2*len) + 1] = 0; + } + data += 2 * (len + 1); + words_left -= len + 1; + + cFYI(1, ("words left: %d", words_left)); return rc; } -static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo *ses, - const struct nls_table * nls_cp) +static int decode_ascii_ssetup(char **pbcc_area, int bleft, + struct cifsSesInfo *ses, + const struct nls_table *nls_cp) { int rc = 0; int len; - char * bcc_ptr = *pbcc_area; + char *bcc_ptr = *pbcc_area; + + cFYI(1, ("decode sessetup ascii. bleft %d", bleft)); - cFYI(1,("decode sessetup ascii. bleft %d", bleft)); - len = strnlen(bcc_ptr, bleft); - if(len >= bleft) + if (len >= bleft) return rc; - - if(ses->serverOS) + + if (ses->serverOS) kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, GFP_KERNEL); - if(ses->serverOS) + if (ses->serverOS) strncpy(ses->serverOS, bcc_ptr, len); - if(strncmp(ses->serverOS, "OS/2",4) == 0) { - cFYI(1,("OS/2 server")); + if (strncmp(ses->serverOS, "OS/2", 4) == 0) { + cFYI(1, ("OS/2 server")); ses->flags |= CIFS_SES_OS2; } @@ -283,34 +286,34 @@ static int decode_ascii_ssetup(char ** pbcc_area, int bleft, struct cifsSesInfo bleft -= len + 1; len = strnlen(bcc_ptr, bleft); - if(len >= bleft) + if (len >= bleft) return rc; - if(ses->serverNOS) + if (ses->serverNOS) kfree(ses->serverNOS); ses->serverNOS = kzalloc(len + 1, GFP_KERNEL); - if(ses->serverNOS) + if (ses->serverNOS) strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len + 1; bleft -= len + 1; - len = strnlen(bcc_ptr, bleft); - if(len > bleft) - return rc; + len = strnlen(bcc_ptr, bleft); + if (len > bleft) + return rc; /* No domain field in LANMAN case. Domain is returned by old servers in the SMB negprot response */ /* BB For newer servers which do not support Unicode, but thus do return domain here we could add parsing for it later, but it is not very important */ - cFYI(1,("ascii: bytes left %d",bleft)); + cFYI(1, ("ascii: bytes left %d", bleft)); return rc; } -int +int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, const struct nls_table *nls_cp) { @@ -328,13 +331,13 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, __u16 action; int bytes_remaining; - if(ses == NULL) + if (ses == NULL) return -EINVAL; type = ses->server->secType; - cFYI(1,("sess setup type %d",type)); - if(type == LANMAN) { + cFYI(1, ("sess setup type %d", type)); + if (type == LANMAN) { #ifndef CONFIG_CIFS_WEAK_PW_HASH /* LANMAN and plaintext are less secure and off by default. So we make this explicitly be turned on in kconfig (in the @@ -344,15 +347,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, return -EOPNOTSUPP; #endif wct = 10; /* lanman 2 style sessionsetup */ - } else if((type == NTLM) || (type == NTLMv2)) { + } else if ((type == NTLM) || (type == NTLMv2)) { /* For NTLMv2 failures eventually may need to retry NTLM */ wct = 13; /* old style NTLM sessionsetup */ - } else /* same size for negotiate or auth, NTLMSSP or extended security */ + } else /* same size: negotiate or auth, NTLMSSP or extended security */ wct = 12; rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, (void **)&smb_buf); - if(rc) + if (rc) return rc; pSMB = (SESSION_SETUP_ANDX *)smb_buf; @@ -364,8 +367,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, second part which will include the strings and rest of bcc area, in order to avoid having to do a large buffer 17K allocation */ - iov[0].iov_base = (char *)pSMB; - iov[0].iov_len = smb_buf->smb_buf_length + 4; + iov[0].iov_base = (char *)pSMB; + iov[0].iov_len = smb_buf->smb_buf_length + 4; /* 2000 big enough to fit max user, domain, NOS name etc. */ str_area = kmalloc(2000, GFP_KERNEL); @@ -373,18 +376,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, ses->flags &= ~CIFS_SES_LANMAN; - if(type == LANMAN) { + if (type == LANMAN) { #ifdef CONFIG_CIFS_WEAK_PW_HASH char lnm_session_key[CIFS_SESS_KEY_SIZE]; /* no capabilities flags in old lanman negotiation */ - pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); /* BB calculate hash with password */ /* and copy into bcc */ calc_lanman_hash(ses, lnm_session_key); - ses->flags |= CIFS_SES_LANMAN; + ses->flags |= CIFS_SES_LANMAN; /* #ifdef CONFIG_CIFS_DEBUG2 cifs_dump_mem("cryptkey: ",ses->server->cryptKey, CIFS_SESS_KEY_SIZE); @@ -397,10 +400,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, changed to do higher than lanman dialect and we reconnected would we ever calc signing_key? */ - cFYI(1,("Negotiating LANMAN setting up strings")); + cFYI(1, ("Negotiating LANMAN setting up strings")); /* Unicode not allowed for LANMAN dialects */ ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); -#endif +#endif } else if (type == NTLM) { char ntlm_session_key[CIFS_SESS_KEY_SIZE]; @@ -409,38 +412,38 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, cpu_to_le16(CIFS_SESS_KEY_SIZE); pSMB->req_no_secext.CaseSensitivePasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); - + /* calculate session key */ SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); - if(first_time) /* should this be moved into common code + if (first_time) /* should this be moved into common code with similar ntlmv2 path? */ - cifs_calculate_mac_key(ses->server->mac_signing_key, + cifs_calculate_mac_key(&ses->server->mac_signing_key, ntlm_session_key, ses->password); /* copy session key */ - memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE); + memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE); bcc_ptr += CIFS_SESS_KEY_SIZE; - memcpy(bcc_ptr, (char *)ntlm_session_key,CIFS_SESS_KEY_SIZE); + memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE); bcc_ptr += CIFS_SESS_KEY_SIZE; - if(ses->capabilities & CAP_UNICODE) { + if (ses->capabilities & CAP_UNICODE) { /* unicode strings must be word aligned */ if (iov[0].iov_len % 2) { *bcc_ptr = 0; - bcc_ptr++; - } + bcc_ptr++; + } unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); } else ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); } else if (type == NTLMv2) { - char * v2_sess_key = + char *v2_sess_key = kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL); /* BB FIXME change all users of v2_sess_key to struct ntlmv2_resp */ - if(v2_sess_key == NULL) { + if (v2_sess_key == NULL) { cifs_small_buf_release(smb_buf); return -ENOMEM; } @@ -456,8 +459,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, /* calculate session key */ setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); - if(first_time) /* should this be moved into common code - with similar ntlmv2 path? */ + if (first_time) /* should this be moved into common code + with similar ntlmv2 path? */ /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key, response BB FIXME, v2_sess_key); */ @@ -465,11 +468,12 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE); bcc_ptr += LM2_SESS_KEY_SIZE; */ - memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp)); + memcpy(bcc_ptr, (char *)v2_sess_key, + sizeof(struct ntlmv2_resp)); bcc_ptr += sizeof(struct ntlmv2_resp); kfree(v2_sess_key); - if(ses->capabilities & CAP_UNICODE) { - if(iov[0].iov_len % 2) { + if (ses->capabilities & CAP_UNICODE) { + if (iov[0].iov_len % 2) { *bcc_ptr = 0; } bcc_ptr++; unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); @@ -488,20 +492,20 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, BCC_LE(smb_buf) = cpu_to_le16(count); iov[1].iov_base = str_area; - iov[1].iov_len = count; + iov[1].iov_len = count; rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0); /* SMB request buf freed in SendReceive2 */ - cFYI(1,("ssetup rc from sendrecv2 is %d",rc)); - if(rc) + cFYI(1, ("ssetup rc from sendrecv2 is %d", rc)); + if (rc) goto ssetup_exit; pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; smb_buf = (struct smb_hdr *)iov[0].iov_base; - if((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { + if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { rc = -EIO; - cERROR(1,("bad word count %d", smb_buf->WordCount)); + cERROR(1, ("bad word count %d", smb_buf->WordCount)); goto ssetup_exit; } action = le16_to_cpu(pSMB->resp.Action); @@ -514,31 +518,32 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, bytes_remaining = BCC(smb_buf); bcc_ptr = pByteArea(smb_buf); - if(smb_buf->WordCount == 4) { + if (smb_buf->WordCount == 4) { __u16 blob_len; blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); bcc_ptr += blob_len; - if(blob_len > bytes_remaining) { - cERROR(1,("bad security blob length %d", blob_len)); + if (blob_len > bytes_remaining) { + cERROR(1, ("bad security blob length %d", blob_len)); rc = -EINVAL; goto ssetup_exit; } bytes_remaining -= blob_len; - } + } /* BB check if Unicode and decode strings */ - if(smb_buf->Flags2 & SMBFLG2_UNICODE) + if (smb_buf->Flags2 & SMBFLG2_UNICODE) rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); else - rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,nls_cp); - + rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, + ses, nls_cp); + ssetup_exit: kfree(str_area); - if(resp_buf_type == CIFS_SMALL_BUFFER) { - cFYI(1,("ssetup freeing small buf %p", iov[0].iov_base)); + if (resp_buf_type == CIFS_SMALL_BUFFER) { + cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); cifs_small_buf_release(iov[0].iov_base); - } else if(resp_buf_type == CIFS_LARGE_BUFFER) + } else if (resp_buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(iov[0].iov_base); return rc; diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index 1b1daf63f062..cfa6d21fb4e8 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c @@ -1,32 +1,32 @@ -/* +/* Unix SMB/Netbios implementation. Version 1.9. - a partial implementation of DES designed for use in the + a partial implementation of DES designed for use in the SMB authentication protocol Copyright (C) Andrew Tridgell 1998 Modified by Steve French (sfrench@us.ibm.com) 2002,2004 - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* NOTES: +/* NOTES: This code makes no attempt to be fast! In fact, it is a very - slow implementation + slow implementation This code is NOT a complete DES implementation. It implements only the minimum necessary for SMB authentication, as used by all SMB @@ -153,7 +153,7 @@ static uchar sbox[8][4][16] = { }; static void -permute(char *out, char *in, uchar * p, int n) +permute(char *out, char *in, uchar *p, int n) { int i; for (i = 0; i < n; i++) @@ -202,18 +202,18 @@ dohash(char *out, char *in, char *key, int forw) char *rl; /* Have to reduce stack usage */ - pk1 = kmalloc(56+56+64+64,GFP_KERNEL); - if(pk1 == NULL) + pk1 = kmalloc(56+56+64+64, GFP_KERNEL); + if (pk1 == NULL) return; ki = kmalloc(16*48, GFP_KERNEL); - if(ki == NULL) { + if (ki == NULL) { kfree(pk1); return; } cd = pk1 + 56; - pd1= cd + 56; + pd1 = cd + 56; rl = pd1 + 64; permute(pk1, key, perm1, 56); @@ -247,7 +247,7 @@ dohash(char *out, char *in, char *key, int forw) char *r2; /* r2[32] */ er = kmalloc(48+48+32+32+32, GFP_KERNEL); - if(er == NULL) { + if (er == NULL) { kfree(pk1); kfree(ki); return; @@ -327,8 +327,8 @@ smbhash(unsigned char *out, unsigned char *in, unsigned char *key, int forw) char *keyb; /* keyb[64] */ unsigned char key2[8]; - outb = kmalloc(64 * 3,GFP_KERNEL); - if(outb == NULL) + outb = kmalloc(64 * 3, GFP_KERNEL); + if (outb == NULL) return; inb = outb + 64; diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 4b25ba92180d..90542a39be17 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -1,4 +1,4 @@ -/* +/* Unix SMB/Netbios implementation. Version 1.9. SMB parameters and setup @@ -7,17 +7,17 @@ Modified by Jeremy Allison 1995. Copyright (C) Andrew Bartlett <abartlet@samba.org> 2002-2003 Modified by Steve French (sfrench@us.ibm.com) 2002-2003 - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. @@ -57,7 +57,7 @@ void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); /* This implements the X/Open SMB password encryption - It takes a password, a 8 byte "crypt key" and puts 24 bytes of + It takes a password, a 8 byte "crypt key" and puts 24 bytes of encrypted password into p24 */ /* Note that password must be uppercased and null terminated */ void @@ -73,9 +73,9 @@ SMBencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) E_P16(p14, p21); SMBOWFencrypt(p21, c8, p24); - - memset(p14,0,15); - memset(p21,0,21); + + memset(p14, 0, 15); + memset(p21, 0, 21); } /* Routines for Windows NT MD4 Hash functions. */ @@ -90,14 +90,14 @@ _my_wcslen(__u16 * str) /* * Convert a string into an NT UNICODE string. - * Note that regardless of processor type + * Note that regardless of processor type * this must be in intel (little-endian) * format. */ static int _my_mbstowcs(__u16 * dst, const unsigned char *src, int len) -{ /* not a very good conversion routine - change/fix */ +{ /* BB not a very good conversion routine - change/fix */ int i; __u16 val; @@ -112,7 +112,7 @@ _my_mbstowcs(__u16 * dst, const unsigned char *src, int len) return i; } -/* +/* * Creates the MD4 Hash of the users password in NT UNICODE. */ @@ -123,7 +123,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16) __u16 wpwd[129]; /* Password cannot be longer than 128 characters */ - if(passwd) { + if (passwd) { len = strlen((char *) passwd); if (len > 128) { len = 128; @@ -138,7 +138,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16) len = _my_wcslen(wpwd) * sizeof (__u16); mdfour(p16, (unsigned char *) wpwd, len); - memset(wpwd,0,129 * 2); + memset(wpwd, 0, 129 * 2); } #if 0 /* currently unused */ @@ -178,17 +178,17 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n, const char *domain_n, unsigned char kr_buf[16], const struct nls_table *nls_codepage) { - wchar_t * user_u; - wchar_t * dom_u; + wchar_t *user_u; + wchar_t *dom_u; int user_l, domain_l; struct HMACMD5Context ctx; /* might as well do one alloc to hold both (user_u and dom_u) */ - user_u = kmalloc(2048 * sizeof(wchar_t),GFP_KERNEL); - if(user_u == NULL) + user_u = kmalloc(2048 * sizeof(wchar_t), GFP_KERNEL); + if (user_u == NULL) return; dom_u = user_u + 1024; - + /* push_ucs2(NULL, user_u, user_n, (user_l+1)*2, STR_UNICODE|STR_NOALIGN|STR_TERMINATE|STR_UPPER); push_ucs2(NULL, dom_u, domain_n, (domain_l+1)*2, STR_UNICODE|STR_NOALIGN|STR_TERMINATE|STR_UPPER); */ @@ -206,7 +206,7 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n, kfree(user_u); } -#endif +#endif /* Does the des encryption from the NT or LM MD4 hash. */ static void @@ -256,15 +256,15 @@ SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) #if 0 static void SMBOWFencrypt_ntv2(const unsigned char kr[16], - const struct data_blob * srv_chal, - const struct data_blob * cli_chal, unsigned char resp_buf[16]) + const struct data_blob *srv_chal, + const struct data_blob *cli_chal, unsigned char resp_buf[16]) { - struct HMACMD5Context ctx; + struct HMACMD5Context ctx; - hmac_md5_init_limK_to_64(kr, 16, &ctx); - hmac_md5_update(srv_chal->data, srv_chal->length, &ctx); - hmac_md5_update(cli_chal->data, cli_chal->length, &ctx); - hmac_md5_final(resp_buf, &ctx); + hmac_md5_init_limK_to_64(kr, 16, &ctx); + hmac_md5_update(srv_chal->data, srv_chal->length, &ctx); + hmac_md5_update(cli_chal->data, cli_chal->length, &ctx); + hmac_md5_final(resp_buf, &ctx); } static void diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index 212c3c296409..2ef0be288820 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -4,8 +4,8 @@ * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) * - * See Error Codes section of the SNIA CIFS Specification - * for more information + * See Error Codes section of the SNIA CIFS Specification + * for more information * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -19,7 +19,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define SUCCESS 0x00 /* The request was successful. */ @@ -110,7 +110,7 @@ /* Below errors are used internally (do not come over the wire) for passthrough from STATUS codes to POSIX only */ -#define ErrTooManyLinks 0xFFFE +#define ErrTooManyLinks 0xFFFE /* Following error codes may be generated with the ERRSRV error class.*/ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 5f468459a1e2..746bc9405db1 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1,10 +1,10 @@ /* * fs/cifs/transport.c * - * Copyright (C) International Business Machines Corp., 2002,2005 + * Copyright (C) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) * Jeremy Allison (jra@samba.org) 2006. - * + * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or @@ -17,7 +17,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> @@ -32,7 +32,7 @@ #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" - + extern mempool_t *cifs_mid_poolp; extern struct kmem_cache *cifs_oplock_cachep; @@ -49,7 +49,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) cERROR(1, ("Null TCP session in AllocMidQEntry")); return NULL; } - + temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, GFP_KERNEL | GFP_NOFS); if (temp == NULL) @@ -86,7 +86,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) list_del(&midEntry->qhead); atomic_dec(&midCount); spin_unlock(&GlobalMid_Lock); - if(midEntry->largeBuf) + if (midEntry->largeBuf) cifs_buf_release(midEntry->resp_buf); else cifs_small_buf_release(midEntry->resp_buf); @@ -94,8 +94,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) now = jiffies; /* commands taking longer than one second are indications that something is wrong, unless it is quite a slow link or server */ - if((now - midEntry->when_alloc) > HZ) { - if((cifsFYI & CIFS_TIMER) && + if ((now - midEntry->when_alloc) > HZ) { + if ((cifsFYI & CIFS_TIMER) && (midEntry->command != SMB_COM_LOCKING_ANDX)) { printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %d", midEntry->command, midEntry->mid); @@ -110,10 +110,10 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } struct oplock_q_entry * -AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon) +AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon) { struct oplock_q_entry *temp; - if ((pinode== NULL) || (tcon == NULL)) { + if ((pinode == NULL) || (tcon == NULL)) { cERROR(1, ("Null parms passed to AllocOplockQEntry")); return NULL; } @@ -133,9 +133,9 @@ AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon) } -void DeleteOplockQEntry(struct oplock_q_entry * oplockEntry) +void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) { - spin_lock(&GlobalMid_Lock); + spin_lock(&GlobalMid_Lock); /* should we check if list empty first? */ list_del(&oplockEntry->qhead); spin_unlock(&GlobalMid_Lock); @@ -152,7 +152,7 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, struct kvec iov; unsigned len = smb_buf_length + 4; - if(ssocket == NULL) + if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ iov.iov_base = smb_buffer; iov.iov_len = len; @@ -164,8 +164,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ /* smb header is converted in header_assemble. bcc and rest of SMB word - area, and byte area if necessary, is converted to littleendian in - cifssmb.c and RFC1001 len is converted to bigendian in smb_send + area, and byte area if necessary, is converted to littleendian in + cifssmb.c and RFC1001 len is converted to bigendian in smb_send Flags2 is converted in SendReceive */ smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length); @@ -177,9 +177,9 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, if ((rc == -ENOSPC) || (rc == -EAGAIN)) { i++; /* smaller timeout here than send2 since smaller size */ - /* Although it may not be required, this also is smaller - oplock break time */ - if(i > 12) { + /* Although it may not be required, this also is smaller + oplock break time */ + if (i > 12) { cERROR(1, ("sends on sock %p stuck for 7 seconds", ssocket)); @@ -189,7 +189,7 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, msleep(1 << i); continue; } - if (rc < 0) + if (rc < 0) break; else i = 0; /* reset i after each successful send */ @@ -199,7 +199,7 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, } if (rc < 0) { - cERROR(1,("Error %d sending data on socket to server", rc)); + cERROR(1, ("Error %d sending data on socket to server", rc)); } else { rc = 0; } @@ -223,8 +223,8 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int total_len; int first_vec = 0; unsigned int smb_buf_length = smb_buffer->smb_buf_length; - - if(ssocket == NULL) + + if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ smb_msg.msg_name = sin; @@ -234,8 +234,8 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ /* smb header is converted in header_assemble. bcc and rest of SMB word - area, and byte area if necessary, is converted to littleendian in - cifssmb.c and RFC1001 len is converted to bigendian in smb_send + area, and byte area if necessary, is converted to littleendian in + cifssmb.c and RFC1001 len is converted to bigendian in smb_send Flags2 is converted in SendReceive */ @@ -252,7 +252,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, n_vec - first_vec, total_len); if ((rc == -ENOSPC) || (rc == -EAGAIN)) { i++; - if(i >= 14) { + if (i >= 14) { cERROR(1, ("sends on sock %p stuck for 15 seconds", ssocket)); @@ -262,17 +262,17 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, msleep(1 << i); continue; } - if (rc < 0) + if (rc < 0) break; if (rc >= total_len) { WARN_ON(rc > total_len); break; } - if(rc == 0) { + if (rc == 0) { /* should never happen, letting socket clear before retrying is our only obvious option here */ - cERROR(1,("tcp sent no data")); + cERROR(1, ("tcp sent no data")); msleep(500); continue; } @@ -295,7 +295,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, } if (rc < 0) { - cERROR(1,("Error %d sending data on socket to server", rc)); + cERROR(1, ("Error %d sending data on socket to server", rc)); } else rc = 0; @@ -308,13 +308,13 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) { - if(long_op == -1) { + if (long_op == -1) { /* oplock breaks must not be held up */ atomic_inc(&ses->server->inFlight); } else { - spin_lock(&GlobalMid_Lock); - while(1) { - if(atomic_read(&ses->server->inFlight) >= + spin_lock(&GlobalMid_Lock); + while (1) { + if (atomic_read(&ses->server->inFlight) >= cifs_max_pending){ spin_unlock(&GlobalMid_Lock); #ifdef CONFIG_CIFS_STATS2 @@ -328,14 +328,14 @@ static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) #endif spin_lock(&GlobalMid_Lock); } else { - if(ses->server->tcpStatus == CifsExiting) { + if (ses->server->tcpStatus == CifsExiting) { spin_unlock(&GlobalMid_Lock); return -ENOENT; } - /* can not count locking commands against total since - they are allowed to block on server */ - + /* can not count locking commands against total + as they are allowed to block on server */ + /* update # of requests on the wire to server */ if (long_op < 3) atomic_inc(&ses->server->inFlight); @@ -353,11 +353,11 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, if (ses->server->tcpStatus == CifsExiting) { return -ENOENT; } else if (ses->server->tcpStatus == CifsNeedReconnect) { - cFYI(1,("tcp session dead - return to caller to retry")); + cFYI(1, ("tcp session dead - return to caller to retry")); return -EAGAIN; } else if (ses->status != CifsGood) { /* check if SMB session is bad because we are setting it up */ - if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && + if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && (in_buf->Command != SMB_COM_NEGOTIATE)) { return -EAGAIN; } /* else ok - we are setting up session */ @@ -369,7 +369,7 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, return 0; } -static int wait_for_response(struct cifsSesInfo *ses, +static int wait_for_response(struct cifsSesInfo *ses, struct mid_q_entry *midQ, unsigned long timeout, unsigned long time_to_wait) @@ -379,8 +379,8 @@ static int wait_for_response(struct cifsSesInfo *ses, for (;;) { curr_timeout = timeout + jiffies; wait_event(ses->server->response_q, - (!(midQ->midState == MID_REQUEST_SUBMITTED)) || - time_after(jiffies, curr_timeout) || + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || + time_after(jiffies, curr_timeout) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); @@ -398,16 +398,16 @@ static int wait_for_response(struct cifsSesInfo *ses, spin_unlock(&GlobalMid_Lock); /* Calculate time_to_wait past last receive time. - Although we prefer not to time out if the + Although we prefer not to time out if the server is still responding - we will time - out if the server takes more than 15 (or 45 + out if the server takes more than 15 (or 45 or 180) seconds to respond to this request - and has not responded to any request from + and has not responded to any request from other threads on the client within 10 seconds */ lrt += time_to_wait; if (time_after(jiffies, lrt)) { /* No replies for time_to_wait. */ - cERROR(1,("server not responding")); + cERROR(1, ("server not responding")); return -1; } } else { @@ -417,8 +417,8 @@ static int wait_for_response(struct cifsSesInfo *ses, } int -SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, - struct kvec *iov, int n_vec, int * pRespBufType /* ret */, +SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, + struct kvec *iov, int n_vec, int *pRespBufType /* ret */, const int long_op) { int rc = 0; @@ -426,21 +426,21 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, unsigned long timeout; struct mid_q_entry *midQ; struct smb_hdr *in_buf = iov[0].iov_base; - + *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ if ((ses == NULL) || (ses->server == NULL)) { cifs_small_buf_release(in_buf); - cERROR(1,("Null session")); + cERROR(1, ("Null session")); return -EIO; } - if(ses->server->tcpStatus == CifsExiting) { + if (ses->server->tcpStatus == CifsExiting) { cifs_small_buf_release(in_buf); return -ENOENT; } - /* Ensure that we do not send more than 50 overlapping requests + /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or use ses->maxReq */ @@ -450,23 +450,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, return rc; } - /* make sure that we sign in the same order that we send on this socket + /* make sure that we sign in the same order that we send on this socket and avoid races inside tcp sendmsg code that could cause corruption of smb data */ - down(&ses->server->tcpSem); + down(&ses->server->tcpSem); rc = allocate_mid(ses, in_buf, &midQ); if (rc) { up(&ses->server->tcpSem); cifs_small_buf_release(in_buf); /* Update # of requests on wire to server */ - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; } - rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); + rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); midQ->midState = MID_REQUEST_SUBMITTED; #ifdef CONFIG_CIFS_STATS2 @@ -482,7 +482,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, up(&ses->server->tcpSem); cifs_small_buf_release(in_buf); - if(rc < 0) + if (rc < 0) goto out; if (long_op == -1) @@ -490,18 +490,18 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, else if (long_op == 2) /* writes past end of file can take loong time */ timeout = 180 * HZ; else if (long_op == 1) - timeout = 45 * HZ; /* should be greater than + timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ else timeout = 15 * HZ; - /* wait for 15 seconds or until woken up due to response arriving or + /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ if (signal_pending(current)) { /* if signal pending do not hold up user for full smb timeout but we still give response a chance to complete */ timeout = 2 * HZ; - } + } /* No user interrupts in wait - wreaks havoc with performance */ wait_for_response(ses, midQ, timeout, 10 * HZ); @@ -511,10 +511,10 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, spin_unlock(&GlobalMid_Lock); receive_len = midQ->resp_buf->smb_buf_length; } else { - cERROR(1,("No response to cmd %d mid %d", + cERROR(1, ("No response to cmd %d mid %d", midQ->command, midQ->mid)); - if(midQ->midState == MID_REQUEST_SUBMITTED) { - if(ses->server->tcpStatus == CifsExiting) + if (midQ->midState == MID_REQUEST_SUBMITTED) { + if (ses->server->tcpStatus == CifsExiting) rc = -EHOSTDOWN; else { ses->server->tcpStatus = CifsNeedReconnect; @@ -523,9 +523,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } if (rc != -EHOSTDOWN) { - if(midQ->midState == MID_RETRY_NEEDED) { + if (midQ->midState == MID_RETRY_NEEDED) { rc = -EAGAIN; - cFYI(1,("marking request for retry")); + cFYI(1, ("marking request for retry")); } else { rc = -EIO; } @@ -533,21 +533,21 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); /* Update # of requests on wire to server */ - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; } - + if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { cERROR(1, ("Frame too large received. Length: %d Xid: %d", receive_len, xid)); rc = -EIO; } else { /* rcvd frame is ok */ - if (midQ->resp_buf && + if (midQ->resp_buf && (midQ->midState == MID_RESPONSE_RECEIVED)) { iov[0].iov_base = (char *)midQ->resp_buf; - if(midQ->largeBuf) + if (midQ->largeBuf) *pRespBufType = CIFS_LARGE_BUFFER; else *pRespBufType = CIFS_SMALL_BUFFER; @@ -555,14 +555,14 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, dump_smb(midQ->resp_buf, 80); /* convert the length into a more usable form */ - if((receive_len > 24) && + if ((receive_len > 24) && (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(midQ->resp_buf, - ses->server->mac_signing_key, + &ses->server->mac_signing_key, midQ->sequence_number+1); - if(rc) { - cERROR(1,("Unexpected SMB signature")); + if (rc) { + cERROR(1, ("Unexpected SMB signature")); /* BB FIXME add code to kill session */ } } @@ -576,19 +576,19 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, sizeof (struct smb_hdr) - 4 /* do not count RFC1001 header */ + (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ ) - BCC(midQ->resp_buf) = + BCC(midQ->resp_buf) = le16_to_cpu(BCC_LE(midQ->resp_buf)); midQ->resp_buf = NULL; /* mark it so will not be freed by DeleteMidQEntry */ } else { rc = -EIO; - cFYI(1,("Bad MID state?")); + cFYI(1, ("Bad MID state?")); } } out: DeleteMidQEntry(midQ); - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; @@ -605,18 +605,18 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, struct mid_q_entry *midQ; if (ses == NULL) { - cERROR(1,("Null smb session")); + cERROR(1, ("Null smb session")); return -EIO; } - if(ses->server == NULL) { - cERROR(1,("Null tcp session")); + if (ses->server == NULL) { + cERROR(1, ("Null tcp session")); return -EIO; } - if(ses->server->tcpStatus == CifsExiting) + if (ses->server->tcpStatus == CifsExiting) return -ENOENT; - /* Ensure that we do not send more than 50 overlapping requests + /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or use ses->maxReq */ @@ -624,17 +624,17 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, if (rc) return rc; - /* make sure that we sign in the same order that we send on this socket + /* make sure that we sign in the same order that we send on this socket and avoid races inside tcp sendmsg code that could cause corruption of smb data */ - down(&ses->server->tcpSem); + down(&ses->server->tcpSem); rc = allocate_mid(ses, in_buf, &midQ); if (rc) { up(&ses->server->tcpSem); /* Update # of requests on wire to server */ - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; } @@ -645,7 +645,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, DeleteMidQEntry(midQ); up(&ses->server->tcpSem); /* Update # of requests on wire to server */ - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return -EIO; } @@ -664,7 +664,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, #endif up(&ses->server->tcpSem); - if(rc < 0) + if (rc < 0) goto out; if (long_op == -1) @@ -672,17 +672,17 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, else if (long_op == 2) /* writes past end of file can take loong time */ timeout = 180 * HZ; else if (long_op == 1) - timeout = 45 * HZ; /* should be greater than + timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ else timeout = 15 * HZ; - /* wait for 15 seconds or until woken up due to response arriving or + /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ if (signal_pending(current)) { /* if signal pending do not hold up user for full smb timeout but we still give response a chance to complete */ timeout = 2 * HZ; - } + } /* No user interrupts in wait - wreaks havoc with performance */ wait_for_response(ses, midQ, timeout, 10 * HZ); @@ -692,10 +692,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, spin_unlock(&GlobalMid_Lock); receive_len = midQ->resp_buf->smb_buf_length; } else { - cERROR(1,("No response for cmd %d mid %d", + cERROR(1, ("No response for cmd %d mid %d", midQ->command, midQ->mid)); - if(midQ->midState == MID_REQUEST_SUBMITTED) { - if(ses->server->tcpStatus == CifsExiting) + if (midQ->midState == MID_REQUEST_SUBMITTED) { + if (ses->server->tcpStatus == CifsExiting) rc = -EHOSTDOWN; else { ses->server->tcpStatus = CifsNeedReconnect; @@ -704,9 +704,9 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } if (rc != -EHOSTDOWN) { - if(midQ->midState == MID_RETRY_NEEDED) { + if (midQ->midState == MID_RETRY_NEEDED) { rc = -EAGAIN; - cFYI(1,("marking request for retry")); + cFYI(1, ("marking request for retry")); } else { rc = -EIO; } @@ -714,11 +714,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); /* Update # of requests on wire to server */ - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; } - + if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { cERROR(1, ("Frame too large received. Length: %d Xid: %d", receive_len, xid)); @@ -734,14 +734,14 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, dump_smb(out_buf, 92); /* convert the length into a more usable form */ - if((receive_len > 24) && + if ((receive_len > 24) && (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - ses->server->mac_signing_key, + &ses->server->mac_signing_key, midQ->sequence_number+1); - if(rc) { - cERROR(1,("Unexpected SMB signature")); + if (rc) { + cERROR(1, ("Unexpected SMB signature")); /* BB FIXME add code to kill session */ } } @@ -759,13 +759,13 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); } else { rc = -EIO; - cERROR(1,("Bad MID state?")); + cERROR(1, ("Bad MID state?")); } } out: DeleteMidQEntry(midQ); - atomic_dec(&ses->server->inFlight); + atomic_dec(&ses->server->inFlight); wake_up(&ses->server->request_q); return rc; @@ -783,7 +783,7 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); in_buf->Mid = mid; - down(&ses->server->tcpSem); + down(&ses->server->tcpSem); rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); if (rc) { up(&ses->server->tcpSem); @@ -832,20 +832,20 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, struct cifsSesInfo *ses; if (tcon == NULL || tcon->ses == NULL) { - cERROR(1,("Null smb session")); + cERROR(1, ("Null smb session")); return -EIO; } ses = tcon->ses; - if(ses->server == NULL) { - cERROR(1,("Null tcp session")); + if (ses->server == NULL) { + cERROR(1, ("Null tcp session")); return -EIO; } - if(ses->server->tcpStatus == CifsExiting) + if (ses->server->tcpStatus == CifsExiting) return -ENOENT; - /* Ensure that we do not send more than 50 overlapping requests + /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or use ses->maxReq */ @@ -853,11 +853,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, if (rc) return rc; - /* make sure that we sign in the same order that we send on this socket + /* make sure that we sign in the same order that we send on this socket and avoid races inside tcp sendmsg code that could cause corruption of smb data */ - down(&ses->server->tcpSem); + down(&ses->server->tcpSem); rc = allocate_mid(ses, in_buf, &midQ); if (rc) { @@ -887,14 +887,14 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, #endif up(&ses->server->tcpSem); - if(rc < 0) { + if (rc < 0) { DeleteMidQEntry(midQ); return rc; } /* Wait for a reply - allow signals to interrupt. */ rc = wait_event_interruptible(ses->server->response_q, - (!(midQ->midState == MID_REQUEST_SUBMITTED)) || + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); @@ -928,7 +928,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } /* Wait 5 seconds for the response. */ - if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ)==0) { + if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ) == 0) { /* We got the response - restart system call. */ rstart = 1; } @@ -939,10 +939,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, spin_unlock(&GlobalMid_Lock); receive_len = midQ->resp_buf->smb_buf_length; } else { - cERROR(1,("No response for cmd %d mid %d", + cERROR(1, ("No response for cmd %d mid %d", midQ->command, midQ->mid)); - if(midQ->midState == MID_REQUEST_SUBMITTED) { - if(ses->server->tcpStatus == CifsExiting) + if (midQ->midState == MID_REQUEST_SUBMITTED) { + if (ses->server->tcpStatus == CifsExiting) rc = -EHOSTDOWN; else { ses->server->tcpStatus = CifsNeedReconnect; @@ -951,9 +951,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } if (rc != -EHOSTDOWN) { - if(midQ->midState == MID_RETRY_NEEDED) { + if (midQ->midState == MID_RETRY_NEEDED) { rc = -EAGAIN; - cFYI(1,("marking request for retry")); + cFYI(1, ("marking request for retry")); } else { rc = -EIO; } @@ -962,7 +962,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, DeleteMidQEntry(midQ); return rc; } - + if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { cERROR(1, ("Frame too large received. Length: %d Xid: %d", receive_len, xid)); @@ -978,14 +978,14 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, dump_smb(out_buf, 92); /* convert the length into a more usable form */ - if((receive_len > 24) && + if ((receive_len > 24) && (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - ses->server->mac_signing_key, + &ses->server->mac_signing_key, midQ->sequence_number+1); - if(rc) { - cERROR(1,("Unexpected SMB signature")); + if (rc) { + cERROR(1, ("Unexpected SMB signature")); /* BB FIXME add code to kill session */ } } @@ -1003,7 +1003,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); } else { rc = -EIO; - cERROR(1,("Bad MID state?")); + cERROR(1, ("Bad MID state?")); } } DeleteMidQEntry(midQ); diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 18fcec190f8b..f61e433d281c 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -1,7 +1,7 @@ /* * fs/cifs/xattr.c * - * Copyright (c) International Business Machines Corp., 2003 + * Copyright (c) International Business Machines Corp., 2003, 2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -37,50 +37,52 @@ #define XATTR_TRUSTED_PREFIX_LEN 8 #define XATTR_SECURITY_PREFIX_LEN 9 /* BB need to add server (Samba e.g) support for security and trusted prefix */ - -int cifs_removexattr(struct dentry * direntry, const char * ea_name) + +int cifs_removexattr(struct dentry *direntry, const char *ea_name) { int rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR int xid; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - struct super_block * sb; - char * full_path; - - if(direntry == NULL) + struct super_block *sb; + char *full_path; + + if (direntry == NULL) return -EIO; - if(direntry->d_inode == NULL) + if (direntry->d_inode == NULL) return -EIO; sb = direntry->d_inode->i_sb; - if(sb == NULL) + if (sb == NULL) return -EIO; xid = GetXid(); - + cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; - + full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } - if(ea_name == NULL) { - cFYI(1,("Null xattr names not supported")); - } else if(strncmp(ea_name,CIFS_XATTR_USER_PREFIX,5) - && (strncmp(ea_name,CIFS_XATTR_OS2_PREFIX,4))) { - cFYI(1,("illegal xattr namespace %s (only user namespace supported)",ea_name)); + if (ea_name == NULL) { + cFYI(1, ("Null xattr names not supported")); + } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) + && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) { + cFYI(1, + ("illegal xattr request %s (only user namespace supported)", + ea_name)); /* BB what if no namespace prefix? */ /* Should we just pass them to server, except for system and perhaps security prefixes? */ } else { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto remove_ea_exit; - ea_name+=5; /* skip past user. prefix */ - rc = CIFSSMBSetEA(xid,pTcon,full_path,ea_name,NULL, + ea_name += 5; /* skip past user. prefix */ + rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL, (__u16)0, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } @@ -91,23 +93,23 @@ remove_ea_exit: return rc; } -int cifs_setxattr(struct dentry * direntry, const char * ea_name, - const void * ea_value, size_t value_size, int flags) +int cifs_setxattr(struct dentry *direntry, const char *ea_name, + const void *ea_value, size_t value_size, int flags) { int rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR int xid; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - struct super_block * sb; - char * full_path; + struct super_block *sb; + char *full_path; - if(direntry == NULL) + if (direntry == NULL) return -EIO; - if(direntry->d_inode == NULL) + if (direntry->d_inode == NULL) return -EIO; sb = direntry->d_inode->i_sb; - if(sb == NULL) + if (sb == NULL) return -EIO; xid = GetXid(); @@ -115,7 +117,7 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } @@ -123,67 +125,69 @@ int cifs_setxattr(struct dentry * direntry, const char * ea_name, /* return alt name if available as pseudo attr */ /* if proc/fs/cifs/streamstoxattr is set then - search server for EAs or streams to + search server for EAs or streams to returns as xattrs */ - if(value_size > MAX_EA_VALUE_SIZE) { - cFYI(1,("size of EA value too large")); + if (value_size > MAX_EA_VALUE_SIZE) { + cFYI(1, ("size of EA value too large")); kfree(full_path); FreeXid(xid); return -EOPNOTSUPP; } - if(ea_name == NULL) { - cFYI(1,("Null xattr names not supported")); - } else if(strncmp(ea_name,CIFS_XATTR_USER_PREFIX,5) == 0) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (ea_name == NULL) { + cFYI(1, ("Null xattr names not supported")); + } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto set_ea_exit; - if(strncmp(ea_name,CIFS_XATTR_DOS_ATTRIB,14) == 0) { - cFYI(1,("attempt to set cifs inode metadata")); + if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) { + cFYI(1, ("attempt to set cifs inode metadata")); } ea_name += 5; /* skip past user. prefix */ - rc = CIFSSMBSetEA(xid,pTcon,full_path,ea_name,ea_value, + rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, (__u16)value_size, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } else if(strncmp(ea_name, CIFS_XATTR_OS2_PREFIX,4) == 0) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto set_ea_exit; ea_name += 4; /* skip past os2. prefix */ - rc = CIFSSMBSetEA(xid,pTcon,full_path,ea_name,ea_value, + rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, (__u16)value_size, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } else { - int temp; - temp = strncmp(ea_name,POSIX_ACL_XATTR_ACCESS, + int temp; + temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS)); if (temp == 0) { #ifdef CONFIG_CIFS_POSIX - if(sb->s_flags & MS_POSIXACL) - rc = CIFSSMBSetPosixACL(xid, pTcon,full_path, - ea_value, (const int)value_size, - ACL_TYPE_ACCESS,cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + if (sb->s_flags & MS_POSIXACL) + rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, + ea_value, (const int)value_size, + ACL_TYPE_ACCESS, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1,("set POSIX ACL rc %d",rc)); + cFYI(1, ("set POSIX ACL rc %d", rc)); #else - cFYI(1,("set POSIX ACL not supported")); + cFYI(1, ("set POSIX ACL not supported")); #endif - } else if(strncmp(ea_name,POSIX_ACL_XATTR_DEFAULT,strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { + } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, + strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { #ifdef CONFIG_CIFS_POSIX - if(sb->s_flags & MS_POSIXACL) - rc = CIFSSMBSetPosixACL(xid, pTcon,full_path, - ea_value, (const int)value_size, + if (sb->s_flags & MS_POSIXACL) + rc = CIFSSMBSetPosixACL(xid, pTcon, full_path, + ea_value, (const int)value_size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1,("set POSIX default ACL rc %d",rc)); + cFYI(1, ("set POSIX default ACL rc %d", rc)); #else - cFYI(1,("set default POSIX ACL not supported")); + cFYI(1, ("set default POSIX ACL not supported")); #endif } else { - cFYI(1,("illegal xattr request %s (only user namespace supported)",ea_name)); + cFYI(1, ("illegal xattr request %s (only user namespace" + " supported)", ea_name)); /* BB what if no namespace prefix? */ - /* Should we just pass them to server, except for + /* Should we just pass them to server, except for system and perhaps security prefixes? */ } } @@ -195,23 +199,23 @@ set_ea_exit: return rc; } -ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name, - void * ea_value, size_t buf_size) +ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, + void *ea_value, size_t buf_size) { ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR int xid; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - struct super_block * sb; - char * full_path; + struct super_block *sb; + char *full_path; - if(direntry == NULL) + if (direntry == NULL) return -EIO; - if(direntry->d_inode == NULL) + if (direntry->d_inode == NULL) return -EIO; sb = direntry->d_inode->i_sb; - if(sb == NULL) + if (sb == NULL) return -EIO; xid = GetXid(); @@ -220,42 +224,42 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name, pTcon = cifs_sb->tcon; full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ - if(ea_name == NULL) { - cFYI(1,("Null xattr names not supported")); - } else if(strncmp(ea_name,CIFS_XATTR_USER_PREFIX,5) == 0) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (ea_name == NULL) { + cFYI(1, ("Null xattr names not supported")); + } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto get_ea_exit; - if(strncmp(ea_name,CIFS_XATTR_DOS_ATTRIB,14) == 0) { - cFYI(1,("attempt to query cifs inode metadata")); + if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) { + cFYI(1, ("attempt to query cifs inode metadata")); /* revalidate/getattr then populate from inode */ } /* BB add else when above is implemented */ ea_name += 5; /* skip past user. prefix */ - rc = CIFSSMBQueryEA(xid,pTcon,full_path,ea_name,ea_value, + rc = CIFSSMBQueryEA(xid, pTcon, full_path, ea_name, ea_value, buf_size, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } else if(strncmp(ea_name, CIFS_XATTR_OS2_PREFIX,4) == 0) { - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + } else if (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4) == 0) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto get_ea_exit; ea_name += 4; /* skip past os2. prefix */ - rc = CIFSSMBQueryEA(xid,pTcon,full_path,ea_name,ea_value, + rc = CIFSSMBQueryEA(xid, pTcon, full_path, ea_name, ea_value, buf_size, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } else if(strncmp(ea_name,POSIX_ACL_XATTR_ACCESS, + } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS)) == 0) { #ifdef CONFIG_CIFS_POSIX - if(sb->s_flags & MS_POSIXACL) + if (sb->s_flags & MS_POSIXACL) rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, - ea_value, buf_size, ACL_TYPE_ACCESS, + ea_value, buf_size, ACL_TYPE_ACCESS, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); /* else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { __u16 fid; @@ -272,39 +276,40 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name, CIFSSMBClose(xid, pTcon, fid); } } */ /* BB enable after fixing up return data */ - -#else - cFYI(1,("query POSIX ACL not supported yet")); +#else + cFYI(1, ("query POSIX ACL not supported yet")); #endif /* CONFIG_CIFS_POSIX */ - } else if(strncmp(ea_name,POSIX_ACL_XATTR_DEFAULT, + } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { #ifdef CONFIG_CIFS_POSIX - if(sb->s_flags & MS_POSIXACL) + if (sb->s_flags & MS_POSIXACL) rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, - ea_value, buf_size, ACL_TYPE_DEFAULT, + ea_value, buf_size, ACL_TYPE_DEFAULT, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); -#else - cFYI(1,("query POSIX default ACL not supported yet")); +#else + cFYI(1, ("query POSIX default ACL not supported yet")); #endif - } else if(strncmp(ea_name, - CIFS_XATTR_TRUSTED_PREFIX,XATTR_TRUSTED_PREFIX_LEN) == 0) { - cFYI(1,("Trusted xattr namespace not supported yet")); - } else if(strncmp(ea_name, - CIFS_XATTR_SECURITY_PREFIX,XATTR_SECURITY_PREFIX_LEN) == 0) { - cFYI(1,("Security xattr namespace not supported yet")); + } else if (strncmp(ea_name, + CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { + cFYI(1, ("Trusted xattr namespace not supported yet")); + } else if (strncmp(ea_name, + CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { + cFYI(1, ("Security xattr namespace not supported yet")); } else { - cFYI(1,("illegal xattr name request %s (only user namespace supported)",ea_name)); + cFYI(1, + ("illegal xattr request %s (only user namespace supported)", + ea_name)); } - /* We could add an additional check for streams ie + /* We could add an additional check for streams ie if proc/fs/cifs/streamstoxattr is set then - search server for EAs or streams to + search server for EAs or streams to returns as xattrs */ - if(rc == -EINVAL) - rc = -EOPNOTSUPP; + if (rc == -EINVAL) + rc = -EOPNOTSUPP; get_ea_exit: kfree(full_path); @@ -313,34 +318,34 @@ get_ea_exit: return rc; } -ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size) +ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) { ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR int xid; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; - struct super_block * sb; - char * full_path; + struct super_block *sb; + char *full_path; - if(direntry == NULL) + if (direntry == NULL) return -EIO; - if(direntry->d_inode == NULL) + if (direntry->d_inode == NULL) return -EIO; sb = direntry->d_inode->i_sb; - if(sb == NULL) + if (sb == NULL) return -EIO; cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) return -EOPNOTSUPP; xid = GetXid(); full_path = build_path_from_dentry(direntry); - if(full_path == NULL) { + if (full_path == NULL) { FreeXid(xid); return -ENOMEM; } @@ -348,11 +353,11 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size) /* return alt name if available as pseudo attr */ /* if proc/fs/cifs/streamstoxattr is set then - search server for EAs or streams to + search server for EAs or streams to returns as xattrs */ - rc = CIFSSMBQAllEAs(xid,pTcon,full_path,data,buf_size, + rc = CIFSSMBQAllEAs(xid, pTcon, full_path, data, buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); kfree(full_path); diff --git a/fs/coda/cache.c b/fs/coda/cache.c index fcb88fa8d2f2..8a2370341c7a 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -43,17 +43,12 @@ void coda_cache_enter(struct inode *inode, int mask) void coda_cache_clear_inode(struct inode *inode) { struct coda_inode_info *cii = ITOC(inode); - cii->c_cached_perm = 0; + cii->c_cached_epoch = atomic_read(&permission_epoch) - 1; } /* remove all acl caches */ void coda_cache_clear_all(struct super_block *sb) { - struct coda_sb_info *sbi; - - sbi = coda_sbp(sb); - BUG_ON(!sbi); - atomic_inc(&permission_epoch); } diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 28c872747f81..a7a780929eec 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -55,11 +55,6 @@ static int coda_set_inode(struct inode *inode, void *data) return 0; } -static int coda_fail_inode(struct inode *inode, void *data) -{ - return -1; -} - struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, struct coda_vattr * attr) { @@ -141,7 +136,7 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) return NULL; } - inode = iget5_locked(sb, hash, coda_test_inode, coda_fail_inode, fid); + inode = ilookup5(sb, hash, coda_test_inode, fid); if ( !inode ) return NULL; diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 9e6338fea514..8ccd5ed81d9c 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -1,12 +1,19 @@ #ifndef _CODA_INT_ #define _CODA_INT_ +struct dentry; + extern struct file_system_type coda_fs_type; +extern unsigned long coda_timeout; +extern int coda_hard; +extern int coda_fake_statfs; void coda_destroy_inodecache(void); int coda_init_inodecache(void); int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync); +void coda_sysctl_init(void); +void coda_sysctl_clean(void); #endif /* _CODA_INT_ */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 898a86dde8f5..04a3dd84c993 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -25,7 +25,6 @@ #include <linux/coda_psdev.h> #include <linux/coda_fs_i.h> #include <linux/coda_cache.h> -#include <linux/coda_proc.h> #include "coda_int.h" @@ -43,15 +42,15 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, struct inode *new_inode, struct dentry *new_dentry); /* dir file-ops */ -static int coda_readdir(struct file *file, void *dirent, filldir_t filldir); +static int coda_readdir(struct file *file, void *buf, filldir_t filldir); /* dentry ops */ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); static int coda_dentry_delete(struct dentry *); /* support routines */ -static int coda_venus_readdir(struct file *filp, filldir_t filldir, - void *dirent, struct dentry *dir); +static int coda_venus_readdir(struct file *coda_file, void *buf, + filldir_t filldir); /* same as fs/bad_inode.c */ static int coda_return_EIO(void) @@ -97,58 +96,45 @@ const struct file_operations coda_dir_operations = { /* access routines: lookup, readlink, permission */ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) { - struct inode *res_inode = NULL; + struct inode *inode = NULL; struct CodaFid resfid = { { 0, } }; - int dropme = 0; /* to indicate entry should not be cached */ int type = 0; int error = 0; const char *name = entry->d_name.name; size_t length = entry->d_name.len; - - if ( length > CODA_MAXNAMLEN ) { - printk("name too long: lookup, %s (%*s)\n", + + if (length > CODA_MAXNAMLEN) { + printk(KERN_ERR "name too long: lookup, %s (%*s)\n", coda_i2s(dir), (int)length, name); return ERR_PTR(-ENAMETOOLONG); } + /* control object, create inode on the fly */ + if (coda_isroot(dir) && coda_iscontrol(name, length)) { + error = coda_cnode_makectl(&inode, dir->i_sb); + type = CODA_NOCACHE; + goto exit; + } + lock_kernel(); - /* control object, create inode on the fly */ - if (coda_isroot(dir) && coda_iscontrol(name, length)) { - error = coda_cnode_makectl(&res_inode, dir->i_sb); - dropme = 1; - goto exit; - } - error = venus_lookup(dir->i_sb, coda_i2f(dir), - (const char *)name, length, &type, &resfid); + error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, + &type, &resfid); + if (!error) + error = coda_cnode_make(&inode, &resfid, dir->i_sb); - res_inode = NULL; - if (!error) { - if (type & CODA_NOCACHE) { - type &= (~CODA_NOCACHE); - dropme = 1; - } + unlock_kernel(); - error = coda_cnode_make(&res_inode, &resfid, dir->i_sb); - if (error) { - unlock_kernel(); - return ERR_PTR(error); - } - } else if (error != -ENOENT) { - unlock_kernel(); + if (error && error != -ENOENT) return ERR_PTR(error); - } exit: - entry->d_time = 0; entry->d_op = &coda_dentry_operations; - d_add(entry, res_inode); - if ( dropme ) { - d_drop(entry); - coda_flag_inode(res_inode, C_VATTR); - } - unlock_kernel(); - return NULL; + + if (inode && (type & CODA_NOCACHE)) + coda_flag_inode(inode, C_VATTR | C_PURGE); + + return d_splice_alias(inode, entry); } @@ -161,8 +147,6 @@ int coda_permission(struct inode *inode, int mask, struct nameidata *nd) lock_kernel(); - coda_vfs_stat.permission++; - if (coda_cache_check(inode, mask)) goto out; @@ -173,12 +157,11 @@ int coda_permission(struct inode *inode, int mask, struct nameidata *nd) out: unlock_kernel(); - - return error; + return error; } -static inline void coda_dir_changed(struct inode *dir, int link) +static inline void coda_dir_update_mtime(struct inode *dir) { #ifdef REQUERY_VENUS_FOR_MTIME /* invalidate the directory cnode's attributes so we refetch the @@ -186,12 +169,27 @@ static inline void coda_dir_changed(struct inode *dir, int link) coda_flag_inode(dir, C_VATTR); #else /* optimistically we can also act as if our nose bleeds. The - * granularity of the mtime is coarse anyways so we might actually be - * right most of the time. Note: we only do this for directories. */ + * granularity of the mtime is coarse anyways so we might actually be + * right most of the time. Note: we only do this for directories. */ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; #endif - if (link) - dir->i_nlink += link; +} + +/* we have to wrap inc_nlink/drop_nlink because sometimes userspace uses a + * trick to fool GNU find's optimizations. If we can't be sure of the link + * (because of volume mount points) we set i_nlink to 1 which forces find + * to consider every child as a possible directory. We should also never + * see an increment or decrement for deleted directories where i_nlink == 0 */ +static inline void coda_dir_inc_nlink(struct inode *dir) +{ + if (dir->i_nlink >= 2) + inc_nlink(dir); +} + +static inline void coda_dir_drop_nlink(struct inode *dir) +{ + if (dir->i_nlink > 2) + drop_nlink(dir); } /* creation routines: create, mknod, mkdir, link, symlink */ @@ -205,7 +203,6 @@ static int coda_create(struct inode *dir, struct dentry *de, int mode, struct na struct coda_vattr attrs; lock_kernel(); - coda_vfs_stat.create++; if (coda_isroot(dir) && coda_iscontrol(name, length)) { unlock_kernel(); @@ -229,10 +226,10 @@ static int coda_create(struct inode *dir, struct dentry *de, int mode, struct na } /* invalidate the directory cnode's attributes */ - coda_dir_changed(dir, 0); + coda_dir_update_mtime(dir); unlock_kernel(); d_instantiate(de, inode); - return 0; + return 0; } static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) @@ -245,7 +242,6 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) struct CodaFid newfid; lock_kernel(); - coda_vfs_stat.mkdir++; if (coda_isroot(dir) && coda_iscontrol(name, len)) { unlock_kernel(); @@ -268,12 +264,13 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) d_drop(de); return PTR_ERR(inode); } - + /* invalidate the directory cnode's attributes */ - coda_dir_changed(dir, 1); + coda_dir_inc_nlink(dir); + coda_dir_update_mtime(dir); unlock_kernel(); d_instantiate(de, inode); - return 0; + return 0; } /* try to make de an entry in dir_inodde linked to source_de */ @@ -286,7 +283,6 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, int error; lock_kernel(); - coda_vfs_stat.link++; if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) { unlock_kernel(); @@ -296,16 +292,16 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, error = venus_link(dir_inode->i_sb, coda_i2f(inode), coda_i2f(dir_inode), (const char *)name, len); - if (error) { + if (error) { d_drop(de); goto out; } - coda_dir_changed(dir_inode, 0); + coda_dir_update_mtime(dir_inode); atomic_inc(&inode->i_count); d_instantiate(de, inode); inc_nlink(inode); - + out: unlock_kernel(); return(error); @@ -318,10 +314,9 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, const char *name = de->d_name.name; int len = de->d_name.len; int symlen; - int error=0; - + int error = 0; + lock_kernel(); - coda_vfs_stat.symlink++; if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) { unlock_kernel(); @@ -336,18 +331,18 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, /* * This entry is now negative. Since we do not create - * an inode for the entry we have to drop it. + * an inode for the entry we have to drop it. */ d_drop(de); - error = venus_symlink(dir_inode->i_sb, coda_i2f(dir_inode), name, len, + error = venus_symlink(dir_inode->i_sb, coda_i2f(dir_inode), name, len, symname, symlen); /* mtime is no good anymore */ if ( !error ) - coda_dir_changed(dir_inode, 0); + coda_dir_update_mtime(dir_inode); unlock_kernel(); - return error; + return error; } /* destruction routines: unlink, rmdir */ @@ -358,79 +353,70 @@ int coda_unlink(struct inode *dir, struct dentry *de) int len = de->d_name.len; lock_kernel(); - coda_vfs_stat.unlink++; - error = venus_remove(dir->i_sb, coda_i2f(dir), name, len); - if ( error ) { + error = venus_remove(dir->i_sb, coda_i2f(dir), name, len); + if ( error ) { unlock_kernel(); - return error; - } + return error; + } - coda_dir_changed(dir, 0); + coda_dir_update_mtime(dir); drop_nlink(de->d_inode); unlock_kernel(); - - return 0; + return 0; } int coda_rmdir(struct inode *dir, struct dentry *de) { const char *name = de->d_name.name; int len = de->d_name.len; - int error; + int error; lock_kernel(); - coda_vfs_stat.rmdir++; - if (!d_unhashed(de)) { - unlock_kernel(); - return -EBUSY; - } error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); + if (!error) { + /* VFS may delete the child */ + if (de->d_inode) + de->d_inode->i_nlink = 0; - if ( error ) { - unlock_kernel(); - return error; - } - - coda_dir_changed(dir, -1); - drop_nlink(de->d_inode); - d_delete(de); + /* fix the link count of the parent */ + coda_dir_drop_nlink(dir); + coda_dir_update_mtime(dir); + } unlock_kernel(); - - return 0; + return error; } /* rename */ -static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, +static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - const char *old_name = old_dentry->d_name.name; - const char *new_name = new_dentry->d_name.name; + const char *old_name = old_dentry->d_name.name; + const char *new_name = new_dentry->d_name.name; int old_length = old_dentry->d_name.len; int new_length = new_dentry->d_name.len; - int link_adjust = 0; - int error; + int error; lock_kernel(); - coda_vfs_stat.rename++; - error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), - coda_i2f(new_dir), old_length, new_length, + error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), + coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); - if ( !error ) { + if ( !error ) { if ( new_dentry->d_inode ) { - if ( S_ISDIR(new_dentry->d_inode->i_mode) ) - link_adjust = 1; - - coda_dir_changed(old_dir, -link_adjust); - coda_dir_changed(new_dir, link_adjust); + if ( S_ISDIR(new_dentry->d_inode->i_mode) ) { + coda_dir_drop_nlink(old_dir); + coda_dir_inc_nlink(new_dir); + } + coda_dir_update_mtime(old_dir); + coda_dir_update_mtime(new_dir); coda_flag_inode(new_dentry->d_inode, C_VATTR); } else { coda_flag_inode(old_dir, C_VATTR); coda_flag_inode(new_dir, C_VATTR); - } + } } unlock_kernel(); @@ -439,44 +425,41 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, /* file operations for directories */ -int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir) +int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) { - struct dentry *coda_dentry = coda_file->f_path.dentry; struct coda_file_info *cfi; struct file *host_file; - struct inode *host_inode; int ret; cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - coda_vfs_stat.readdir++; + if (!host_file->f_op) + return -ENOTDIR; - host_inode = host_file->f_path.dentry->d_inode; - mutex_lock(&host_inode->i_mutex); - host_file->f_pos = coda_file->f_pos; + if (host_file->f_op->readdir) + { + /* potemkin case: we were handed a directory inode. + * We can't use vfs_readdir because we have to keep the file + * position in sync between the coda_file and the host_file. + * and as such we need grab the inode mutex. */ + struct inode *host_inode = host_file->f_path.dentry->d_inode; - if (!host_file->f_op->readdir) { - /* Venus: we must read Venus dirents from the file */ - ret = coda_venus_readdir(host_file, filldir, dirent, coda_dentry); - } else { - /* potemkin case: we were handed a directory inode. */ - /* Yuk, we can't call vfs_readdir because we are already - * holding the inode semaphore. */ - ret = -ENOTDIR; - if (!host_file->f_op || !host_file->f_op->readdir) - goto out; + mutex_lock(&host_inode->i_mutex); + host_file->f_pos = coda_file->f_pos; ret = -ENOENT; if (!IS_DEADDIR(host_inode)) { - ret = host_file->f_op->readdir(host_file, dirent, filldir); + ret = host_file->f_op->readdir(host_file, buf, filldir); file_accessed(host_file); } + + coda_file->f_pos = host_file->f_pos; + mutex_unlock(&host_inode->i_mutex); } -out: - coda_file->f_pos = host_file->f_pos; - mutex_unlock(&host_inode->i_mutex); + else /* Venus: we must read Venus dirents from a file */ + ret = coda_venus_readdir(coda_file, buf, filldir); return ret; } @@ -501,57 +484,68 @@ static inline unsigned int CDT2DT(unsigned char cdt) } /* support routines */ -static int coda_venus_readdir(struct file *filp, filldir_t filldir, - void *dirent, struct dentry *dir) +static int coda_venus_readdir(struct file *coda_file, void *buf, + filldir_t filldir) { int result = 0; /* # of entries returned */ + struct coda_file_info *cfi; + struct coda_inode_info *cii; + struct file *host_file; + struct dentry *de; struct venus_dirent *vdir; unsigned long vdir_size = (unsigned long)(&((struct venus_dirent *)0)->d_name); unsigned int type; struct qstr name; ino_t ino; - int ret, i; + int ret; + + cfi = CODA_FTOC(coda_file); + BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + host_file = cfi->cfi_container; + + de = coda_file->f_path.dentry; + cii = ITOC(de->d_inode); vdir = kmalloc(sizeof(*vdir), GFP_KERNEL); if (!vdir) return -ENOMEM; - i = filp->f_pos; - switch(i) { + switch (coda_file->f_pos) { case 0: - ret = filldir(dirent, ".", 1, 0, dir->d_inode->i_ino, DT_DIR); + ret = filldir(buf, ".", 1, 0, de->d_inode->i_ino, DT_DIR); if (ret < 0) break; result++; - filp->f_pos++; + coda_file->f_pos++; /* fallthrough */ case 1: - ret = filldir(dirent, "..", 2, 1, dir->d_parent->d_inode->i_ino, DT_DIR); + ret = filldir(buf, "..", 2, 1, de->d_parent->d_inode->i_ino, DT_DIR); if (ret < 0) break; result++; - filp->f_pos++; + coda_file->f_pos++; /* fallthrough */ default: while (1) { /* read entries from the directory file */ - ret = kernel_read(filp, filp->f_pos - 2, (char *)vdir, + ret = kernel_read(host_file, coda_file->f_pos - 2, (char *)vdir, sizeof(*vdir)); if (ret < 0) { - printk("coda_venus_readdir: read dir failed %d\n", ret); + printk(KERN_ERR "coda readdir: read dir %s failed %d\n", + coda_f2s(&cii->c_fid), ret); break; } if (ret == 0) break; /* end of directory file reached */ /* catch truncated reads */ if (ret < vdir_size || ret < vdir_size + vdir->d_namlen) { - printk("coda_venus_readdir: short read: %ld\n", - filp->f_path.dentry->d_inode->i_ino); + printk(KERN_ERR "coda readdir: short read on %s\n", + coda_f2s(&cii->c_fid)); ret = -EBADF; break; } /* validate whether the directory file actually makes sense */ if (vdir->d_reclen < vdir_size + vdir->d_namlen) { - printk("coda_venus_readdir: Invalid dir: %ld\n", - filp->f_path.dentry->d_inode->i_ino); + printk(KERN_ERR "coda readdir: invalid dir %s\n", + coda_f2s(&cii->c_fid)); ret = -EBADF; break; } @@ -570,21 +564,21 @@ static int coda_venus_readdir(struct file *filp, filldir_t filldir, * userspace doesn't have to worry about breaking * getcwd by having mismatched inode numbers for * internal volume mountpoints. */ - ino = find_inode_number(dir, &name); + ino = find_inode_number(de, &name); if (!ino) ino = vdir->d_fileno; type = CDT2DT(vdir->d_type); - ret = filldir(dirent, name.name, name.len, filp->f_pos, - ino, type); + ret = filldir(buf, name.name, name.len, + coda_file->f_pos, ino, type); /* failure means no space for filling in this round */ if (ret < 0) break; result++; } /* we'll always have progress because d_reclen is unsigned and * we've already established it is non-zero. */ - filp->f_pos += vdir->d_reclen; + coda_file->f_pos += vdir->d_reclen; + } } - } kfree(vdir); return result ? result : ret; } diff --git a/fs/coda/file.c b/fs/coda/file.c index 99dbe866816d..7594962604c2 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -22,7 +22,6 @@ #include <linux/coda_linux.h> #include <linux/coda_fs_i.h> #include <linux/coda_psdev.h> -#include <linux/coda_proc.h> #include "coda_int.h" @@ -134,8 +133,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) unsigned short coda_flags = coda_flags_to_cflags(flags); struct coda_file_info *cfi; - coda_vfs_stat.open++; - cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL); if (!cfi) return -ENOMEM; @@ -143,8 +140,11 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) lock_kernel(); error = venus_open(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, - &host_file); - if (error || !host_file) { + &host_file); + if (!host_file) + error = -EIO; + + if (error) { kfree(cfi); unlock_kernel(); return error; @@ -173,8 +173,6 @@ int coda_flush(struct file *coda_file, fl_owner_t id) lock_kernel(); - coda_vfs_stat.flush++; - /* last close semantics */ fcnt = file_count(coda_file); if (fcnt > 1) @@ -216,8 +214,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) int err = 0; lock_kernel(); - coda_vfs_stat.release++; - + if (!use_coda_close) { err = venus_release(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags); @@ -268,8 +265,6 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - coda_vfs_stat.fsync++; - if (host_file->f_op && host_file->f_op->fsync) { host_dentry = host_file->f_path.dentry; host_inode = host_dentry->d_inode; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index dbff1bd4fb96..6771a4271e33 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -83,7 +83,7 @@ void coda_destroy_inodecache(void) static int coda_remount(struct super_block *sb, int *flags, char *data) { - *flags |= MS_NODIRATIME; + *flags |= MS_NOATIME; return 0; } @@ -141,11 +141,10 @@ static int get_device_index(struct coda_mount_data *data) static int coda_fill_super(struct super_block *sb, void *data, int silent) { - struct inode *root = NULL; - struct coda_sb_info *sbi = NULL; + struct inode *root = NULL; struct venus_comm *vc = NULL; struct CodaFid fid; - int error; + int error; int idx; idx = get_device_index((struct coda_mount_data *) data); @@ -167,21 +166,14 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return -EBUSY; } - sbi = kmalloc(sizeof(struct coda_sb_info), GFP_KERNEL); - if(!sbi) { - return -ENOMEM; - } - vc->vc_sb = sb; - sbi->sbi_vcomm = vc; - - sb->s_fs_info = sbi; - sb->s_flags |= MS_NODIRATIME; /* probably even noatime */ - sb->s_blocksize = 1024; /* XXXXX what do we put here?? */ - sb->s_blocksize_bits = 10; - sb->s_magic = CODA_SUPER_MAGIC; - sb->s_op = &coda_super_operations; + sb->s_fs_info = vc; + sb->s_flags |= MS_NOATIME; + sb->s_blocksize = 4096; /* XXXXX what do we put here?? */ + sb->s_blocksize_bits = 12; + sb->s_magic = CODA_SUPER_MAGIC; + sb->s_op = &coda_super_operations; /* get root fid from Venus: this needs the root inode */ error = venus_rootfid(sb, &fid); @@ -207,26 +199,20 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return 0; error: - if (sbi) { - kfree(sbi); - if(vc) - vc->vc_sb = NULL; - } if (root) - iput(root); + iput(root); + if (vc) + vc->vc_sb = NULL; - return -EINVAL; + return -EINVAL; } static void coda_put_super(struct super_block *sb) { - struct coda_sb_info *sbi; - - sbi = coda_sbp(sb); - sbi->sbi_vcomm->vc_sb = NULL; + coda_vcp(sb)->vc_sb = NULL; + sb->s_fs_info = NULL; printk("Coda: Bye bye.\n"); - kfree(sbi); } static void coda_clear_inode(struct inode *inode) @@ -296,7 +282,7 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf) /* and fill in the rest */ buf->f_type = CODA_SUPER_MAGIC; - buf->f_bsize = 1024; + buf->f_bsize = 4096; buf->f_namelen = CODA_MAXNAMLEN; return 0; diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 803aacf0d49c..dcc6aead70f5 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -45,12 +45,9 @@ #include <linux/coda_linux.h> #include <linux/coda_fs_i.h> #include <linux/coda_psdev.h> -#include <linux/coda_proc.h> #include "coda_int.h" -#define upc_free(r) kfree(r) - /* statistics */ int coda_hard; /* allows signals during upcalls */ unsigned long coda_timeout = 30; /* .. secs, then signals will dequeue */ @@ -195,7 +192,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, if (req->uc_opcode == CODA_OPEN_BY_FD) { struct coda_open_by_fd_out *outp = (struct coda_open_by_fd_out *)req->uc_data; - outp->fh = fget(outp->fd); + if (!outp->oh.result) + outp->fh = fget(outp->fd); } wake_up(&req->uc_sleep); @@ -263,7 +261,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, } CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); - upc_free(req); + kfree(req); out: unlock_kernel(); return (count ? count : retval); @@ -271,71 +269,70 @@ out: static int coda_psdev_open(struct inode * inode, struct file * file) { - struct venus_comm *vcp; - int idx; + struct venus_comm *vcp; + int idx, err; - lock_kernel(); idx = iminor(inode); - if(idx >= MAX_CODADEVS) { - unlock_kernel(); + if (idx < 0 || idx >= MAX_CODADEVS) return -ENODEV; - } + lock_kernel(); + + err = -EBUSY; vcp = &coda_comms[idx]; - if(vcp->vc_inuse) { - unlock_kernel(); - return -EBUSY; - } - - if (!vcp->vc_inuse++) { + if (!vcp->vc_inuse) { + vcp->vc_inuse++; + INIT_LIST_HEAD(&vcp->vc_pending); INIT_LIST_HEAD(&vcp->vc_processing); init_waitqueue_head(&vcp->vc_waitq); vcp->vc_sb = NULL; vcp->vc_seq = 0; + + file->private_data = vcp; + err = 0; } - - file->private_data = vcp; unlock_kernel(); - return 0; + return err; } static int coda_psdev_release(struct inode * inode, struct file * file) { - struct venus_comm *vcp = (struct venus_comm *) file->private_data; - struct upc_req *req, *tmp; + struct venus_comm *vcp = (struct venus_comm *) file->private_data; + struct upc_req *req, *tmp; - lock_kernel(); - if ( !vcp->vc_inuse ) { - unlock_kernel(); + if (!vcp || !vcp->vc_inuse ) { printk("psdev_release: Not open.\n"); return -1; } - if (--vcp->vc_inuse) { - unlock_kernel(); - return 0; - } - - /* Wakeup clients so they can return. */ + lock_kernel(); + + /* Wakeup clients so they can return. */ list_for_each_entry_safe(req, tmp, &vcp->vc_pending, uc_chain) { + list_del(&req->uc_chain); + /* Async requests need to be freed here */ if (req->uc_flags & REQ_ASYNC) { CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); - upc_free(req); + kfree(req); continue; } req->uc_flags |= REQ_ABORT; wake_up(&req->uc_sleep); - } - - list_for_each_entry(req, &vcp->vc_processing, uc_chain) { + } + + list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) { + list_del(&req->uc_chain); + req->uc_flags |= REQ_ABORT; - wake_up(&req->uc_sleep); - } + wake_up(&req->uc_sleep); + } + file->private_data = NULL; + vcp->vc_inuse--; unlock_kernel(); return 0; } @@ -376,21 +373,20 @@ out: return err; } - -MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>"); +MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); +MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); +MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); +#ifdef CONFIG_CODA_FS_OLD_API +MODULE_VERSION("5.3.21"); +#else +MODULE_VERSION("6.6"); +#endif static int __init init_coda(void) { int status; int i; - printk(KERN_INFO "Coda Kernel/Venus communications, " -#ifdef CONFIG_CODA_FS_OLD_API - "v5.3.20" -#else - "v6.0.0" -#endif - ", coda@cs.cmu.edu\n"); status = coda_init_inodecache(); if (status) diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index 76e00a65a75b..4513b7258458 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -20,7 +20,6 @@ #include <linux/coda_linux.h> #include <linux/coda_psdev.h> #include <linux/coda_fs_i.h> -#include <linux/coda_proc.h> static int coda_symlink_filler(struct file *file, struct page *page) { @@ -32,7 +31,6 @@ static int coda_symlink_filler(struct file *file, struct page *page) lock_kernel(); cii = ITOC(inode); - coda_vfs_stat.follow_link++; error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len); unlock_kernel(); diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index c57a1fa7cf23..81b7771c6465 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -5,181 +5,14 @@ * * Carnegie Mellon encourages users to contribute improvements to * the Coda project. Contact Peter Braam (coda@cs.cmu.edu). - * - * CODA operation statistics - * (c) March, 1998 Zhanyong Wan <zhanyong.wan@yale.edu> - * */ -#include <linux/time.h> -#include <linux/mm.h> #include <linux/sysctl.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/ctype.h> -#include <linux/bitops.h> -#include <asm/uaccess.h> -#include <linux/utsname.h> -#include <linux/module.h> -#include <linux/coda.h> -#include <linux/coda_linux.h> -#include <linux/coda_fs_i.h> -#include <linux/coda_psdev.h> -#include <linux/coda_cache.h> -#include <linux/coda_proc.h> +#include "coda_int.h" static struct ctl_table_header *fs_table_header; -#define CODA_TIMEOUT 3 /* timeout on upcalls to become intrble */ -#define CODA_HARD 5 /* mount type "hard" or "soft" */ -#define CODA_VFS 6 /* vfs statistics */ -#define CODA_CACHE_INV 9 /* cache invalidation statistics */ -#define CODA_FAKE_STATFS 10 /* don't query venus for actual cache usage */ - -struct coda_vfs_stats coda_vfs_stat; -static struct coda_cache_inv_stats coda_cache_inv_stat; - -static void reset_coda_vfs_stats( void ) -{ - memset( &coda_vfs_stat, 0, sizeof( coda_vfs_stat ) ); -} - -static void reset_coda_cache_inv_stats( void ) -{ - memset( &coda_cache_inv_stat, 0, sizeof( coda_cache_inv_stat ) ); -} - -static int do_reset_coda_vfs_stats( ctl_table * table, int write, - struct file * filp, void __user * buffer, - size_t * lenp, loff_t * ppos ) -{ - if ( write ) { - reset_coda_vfs_stats(); - - *ppos += *lenp; - } else { - *lenp = 0; - } - - return 0; -} - -static int do_reset_coda_cache_inv_stats( ctl_table * table, int write, - struct file * filp, - void __user * buffer, - size_t * lenp, loff_t * ppos ) -{ - if ( write ) { - reset_coda_cache_inv_stats(); - - *ppos += *lenp; - } else { - *lenp = 0; - } - - return 0; -} - -static int proc_vfs_stats_show(struct seq_file *m, void *v) -{ - struct coda_vfs_stats * ps = & coda_vfs_stat; - - seq_printf(m, - "Coda VFS statistics\n" - "===================\n\n" - "File Operations:\n" - "\topen\t\t%9d\n" - "\tflush\t\t%9d\n" - "\trelease\t\t%9d\n" - "\tfsync\t\t%9d\n\n" - "Dir Operations:\n" - "\treaddir\t\t%9d\n\n" - "Inode Operations\n" - "\tcreate\t\t%9d\n" - "\tlookup\t\t%9d\n" - "\tlink\t\t%9d\n" - "\tunlink\t\t%9d\n" - "\tsymlink\t\t%9d\n" - "\tmkdir\t\t%9d\n" - "\trmdir\t\t%9d\n" - "\trename\t\t%9d\n" - "\tpermission\t%9d\n", - - /* file operations */ - ps->open, - ps->flush, - ps->release, - ps->fsync, - - /* dir operations */ - ps->readdir, - - /* inode operations */ - ps->create, - ps->lookup, - ps->link, - ps->unlink, - ps->symlink, - ps->mkdir, - ps->rmdir, - ps->rename, - ps->permission); - return 0; -} - -static int proc_cache_inv_stats_show(struct seq_file *m, void *v) -{ - struct coda_cache_inv_stats * ps = & coda_cache_inv_stat; - - seq_printf(m, - "Coda cache invalidation statistics\n" - "==================================\n\n" - "flush\t\t%9d\n" - "purge user\t%9d\n" - "zap_dir\t\t%9d\n" - "zap_file\t%9d\n" - "zap_vnode\t%9d\n" - "purge_fid\t%9d\n" - "replace\t\t%9d\n", - ps->flush, - ps->purge_user, - ps->zap_dir, - ps->zap_file, - ps->zap_vnode, - ps->purge_fid, - ps->replace ); - return 0; -} - -static int proc_vfs_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_vfs_stats_show, NULL); -} - -static int proc_cache_inv_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_cache_inv_stats_show, NULL); -} - -static const struct file_operations proc_vfs_stats_fops = { - .owner = THIS_MODULE, - .open = proc_vfs_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_cache_inv_stats_fops = { - .owner = THIS_MODULE, - .open = proc_cache_inv_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static ctl_table coda_table[] = { { .ctl_name = CTL_UNNUMBERED, @@ -199,22 +32,6 @@ static ctl_table coda_table[] = { }, { .ctl_name = CTL_UNNUMBERED, - .procname = "vfs_stats", - .data = NULL, - .maxlen = 0, - .mode = 0644, - .proc_handler = &do_reset_coda_vfs_stats - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "cache_inv_stats", - .data = NULL, - .maxlen = 0, - .mode = 0644, - .proc_handler = &do_reset_coda_cache_inv_stats - }, - { - .ctl_name = CTL_UNNUMBERED, .procname = "fake_statfs", .data = &coda_fake_statfs, .maxlen = sizeof(int), @@ -235,59 +52,20 @@ static ctl_table fs_table[] = { }; -#ifdef CONFIG_PROC_FS - -/* - target directory structure: - /proc/fs (see linux/fs/proc/root.c) - /proc/fs/coda - /proc/fs/coda/{vfs_stats, - -*/ - -static struct proc_dir_entry* proc_fs_coda; - -#endif - void coda_sysctl_init(void) { - reset_coda_vfs_stats(); - reset_coda_cache_inv_stats(); - -#ifdef CONFIG_PROC_FS - proc_fs_coda = proc_mkdir("coda", proc_root_fs); - if (proc_fs_coda) { - struct proc_dir_entry *pde; - - proc_fs_coda->owner = THIS_MODULE; - pde = create_proc_entry("vfs_stats", 0, proc_fs_coda); - if (pde) - pde->proc_fops = &proc_vfs_stats_fops; - pde = create_proc_entry("cache_inv_stats", 0, proc_fs_coda); - if (pde) - pde->proc_fops = &proc_cache_inv_stats_fops; - } -#endif - #ifdef CONFIG_SYSCTL if ( !fs_table_header ) fs_table_header = register_sysctl_table(fs_table); -#endif +#endif } -void coda_sysctl_clean(void) +void coda_sysctl_clean(void) { - #ifdef CONFIG_SYSCTL if ( fs_table_header ) { unregister_sysctl_table(fs_table_header); fs_table_header = NULL; } #endif - -#ifdef CONFIG_PROC_FS - remove_proc_entry("cache_inv_stats", proc_fs_coda); - remove_proc_entry("vfs_stats", proc_fs_coda); - remove_proc_entry("coda", proc_root_fs); -#endif } diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 5faacdb1a479..e4e766e5557c 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -35,12 +35,10 @@ #include <linux/coda_psdev.h> #include <linux/coda_fs_i.h> #include <linux/coda_cache.h> -#include <linux/coda_proc.h> -#define upc_alloc() kmalloc(sizeof(struct upc_req), GFP_KERNEL) -#define upc_free(r) kfree(r) +#include "coda_int.h" -static int coda_upcall(struct coda_sb_info *mntinfo, int inSize, int *outSize, +static int coda_upcall(struct venus_comm *vc, int inSize, int *outSize, union inputArgs *buffer); static void *alloc_upcall(int opcode, int size) @@ -86,13 +84,9 @@ int venus_rootfid(struct super_block *sb, struct CodaFid *fidp) insize = SIZE(root); UPARG(CODA_ROOT); - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - - if (error) { - printk("coda_get_rootfid: error %d\n", error); - } else { + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) *fidp = outp->coda_root.VFid; - } CODA_FREE(inp, insize); return error; @@ -109,9 +103,9 @@ int venus_getattr(struct super_block *sb, struct CodaFid *fid, UPARG(CODA_GETATTR); inp->coda_getattr.VFid = *fid; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - - *attr = outp->coda_getattr.attr; + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) + *attr = outp->coda_getattr.attr; CODA_FREE(inp, insize); return error; @@ -130,7 +124,7 @@ int venus_setattr(struct super_block *sb, struct CodaFid *fid, inp->coda_setattr.VFid = *fid; inp->coda_setattr.attr = *vattr; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -156,10 +150,11 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid, memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - - *resfid = outp->coda_lookup.VFid; - *type = outp->coda_lookup.vtype; + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) { + *resfid = outp->coda_lookup.VFid; + *type = outp->coda_lookup.vtype; + } CODA_FREE(inp, insize); return error; @@ -188,7 +183,7 @@ int venus_store(struct super_block *sb, struct CodaFid *fid, int flags, inp->coda_store.VFid = *fid; inp->coda_store.flags = flags; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -206,7 +201,7 @@ int venus_release(struct super_block *sb, struct CodaFid *fid, int flags) inp->coda_release.VFid = *fid; inp->coda_release.flags = flags; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -235,7 +230,7 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, inp->coda_close.VFid = *fid; inp->coda_close.flags = flags; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -251,12 +246,12 @@ int venus_open(struct super_block *sb, struct CodaFid *fid, insize = SIZE(open_by_fd); UPARG(CODA_OPEN_BY_FD); - inp->coda_open.VFid = *fid; - inp->coda_open.flags = flags; + inp->coda_open_by_fd.VFid = *fid; + inp->coda_open_by_fd.flags = flags; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - - *fh = outp->coda_open_by_fd.fh; + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) + *fh = outp->coda_open_by_fd.fh; CODA_FREE(inp, insize); return error; @@ -281,11 +276,12 @@ int venus_mkdir(struct super_block *sb, struct CodaFid *dirfid, /* Venus must get null terminated string */ memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; - - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - *attrs = outp->coda_mkdir.attr; - *newfid = outp->coda_mkdir.VFid; + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) { + *attrs = outp->coda_mkdir.attr; + *newfid = outp->coda_mkdir.VFid; + } CODA_FREE(inp, insize); return error; @@ -323,7 +319,7 @@ int venus_rename(struct super_block *sb, struct CodaFid *old_fid, memcpy((char *)(inp) + offset, new_name, new_length); *((char *)inp + offset + new_length) = '\0'; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -351,11 +347,12 @@ int venus_create(struct super_block *sb, struct CodaFid *dirfid, /* Venus must get null terminated string */ memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; - - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - *attrs = outp->coda_create.attr; - *newfid = outp->coda_create.VFid; + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) { + *attrs = outp->coda_create.attr; + *newfid = outp->coda_create.VFid; + } CODA_FREE(inp, insize); return error; @@ -377,8 +374,8 @@ int venus_rmdir(struct super_block *sb, struct CodaFid *dirfid, inp->coda_rmdir.name = offset; memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; - - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -399,8 +396,8 @@ int venus_remove(struct super_block *sb, struct CodaFid *dirfid, inp->coda_remove.name = offset; memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; - - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -420,19 +417,18 @@ int venus_readlink(struct super_block *sb, struct CodaFid *fid, UPARG(CODA_READLINK); inp->coda_readlink.VFid = *fid; - - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - - if (! error) { - retlen = outp->coda_readlink.count; + + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); + if (!error) { + retlen = outp->coda_readlink.count; if ( retlen > *length ) - retlen = *length; + retlen = *length; *length = retlen; result = (char *)outp + (long)outp->coda_readlink.data; memcpy(buffer, result, retlen); *(buffer + retlen) = '\0'; } - + CODA_FREE(inp, insize); return error; } @@ -458,8 +454,8 @@ int venus_link(struct super_block *sb, struct CodaFid *fid, /* make sure strings are null terminated */ memcpy((char *)(inp) + offset, name, len); *((char *)inp + offset + len) = '\0'; - - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -494,7 +490,7 @@ int venus_symlink(struct super_block *sb, struct CodaFid *fid, memcpy((char *)(inp) + offset, name, len); *((char *)inp + offset + len) = '\0'; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -509,9 +505,9 @@ int venus_fsync(struct super_block *sb, struct CodaFid *fid) insize=SIZE(fsync); UPARG(CODA_FSYNC); - inp->coda_fsync.VFid = *fid; - error = coda_upcall(coda_sbp(sb), sizeof(union inputArgs), - &outsize, inp); + inp->coda_fsync.VFid = *fid; + error = coda_upcall(coda_vcp(sb), sizeof(union inputArgs), + &outsize, inp); CODA_FREE(inp, insize); return error; @@ -529,7 +525,7 @@ int venus_access(struct super_block *sb, struct CodaFid *fid, int mask) inp->coda_access.VFid = *fid; inp->coda_access.flags = mask; - error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; @@ -578,9 +574,9 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid, goto exit; } - error = coda_upcall(coda_sbp(sb), SIZE(ioctl) + data->vi.in_size, - &outsize, inp); - + error = coda_upcall(coda_vcp(sb), SIZE(ioctl) + data->vi.in_size, + &outsize, inp); + if (error) { printk("coda_pioctl: Venus returns: %d for %s\n", error, coda_f2s(fid)); @@ -620,16 +616,13 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs) insize = max_t(unsigned int, INSIZE(statfs), OUTSIZE(statfs)); UPARG(CODA_STATFS); - error = coda_upcall(coda_sbp(dentry->d_sb), insize, &outsize, inp); - - if (!error) { + error = coda_upcall(coda_vcp(dentry->d_sb), insize, &outsize, inp); + if (!error) { sfs->f_blocks = outp->coda_statfs.stat.f_blocks; sfs->f_bfree = outp->coda_statfs.stat.f_bfree; sfs->f_bavail = outp->coda_statfs.stat.f_bavail; sfs->f_files = outp->coda_statfs.stat.f_files; sfs->f_ffree = outp->coda_statfs.stat.f_ffree; - } else { - printk("coda_statfs: Venus returns: %d\n", error); } CODA_FREE(inp, insize); @@ -638,96 +631,129 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs) /* * coda_upcall and coda_downcall routines. - * */ +static void block_signals(sigset_t *old) +{ + spin_lock_irq(¤t->sighand->siglock); + *old = current->blocked; + + sigfillset(¤t->blocked); + sigdelset(¤t->blocked, SIGKILL); + sigdelset(¤t->blocked, SIGSTOP); + sigdelset(¤t->blocked, SIGINT); + + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + +static void unblock_signals(sigset_t *old) +{ + spin_lock_irq(¤t->sighand->siglock); + current->blocked = *old; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + +/* Don't allow signals to interrupt the following upcalls before venus + * has seen them, + * - CODA_CLOSE or CODA_RELEASE upcall (to avoid reference count problems) + * - CODA_STORE (to avoid data loss) + */ +#define CODA_INTERRUPTIBLE(r) (!coda_hard && \ + (((r)->uc_opcode != CODA_CLOSE && \ + (r)->uc_opcode != CODA_STORE && \ + (r)->uc_opcode != CODA_RELEASE) || \ + (r)->uc_flags & REQ_READ)) -static inline void coda_waitfor_upcall(struct upc_req *vmp, - struct venus_comm *vcommp) +static inline void coda_waitfor_upcall(struct upc_req *req) { DECLARE_WAITQUEUE(wait, current); + unsigned long timeout = jiffies + coda_timeout * HZ; + sigset_t old; + int blocked; - vmp->uc_posttime = jiffies; + block_signals(&old); + blocked = 1; - add_wait_queue(&vmp->uc_sleep, &wait); + add_wait_queue(&req->uc_sleep, &wait); for (;;) { - if ( !coda_hard && vmp->uc_opcode != CODA_CLOSE ) + if (CODA_INTERRUPTIBLE(req)) set_current_state(TASK_INTERRUPTIBLE); else set_current_state(TASK_UNINTERRUPTIBLE); - /* venus died */ - if ( !vcommp->vc_inuse ) - break; - /* got a reply */ - if ( vmp->uc_flags & ( REQ_WRITE | REQ_ABORT ) ) + if (req->uc_flags & (REQ_WRITE | REQ_ABORT)) break; - if ( !coda_hard && vmp->uc_opcode != CODA_CLOSE && signal_pending(current) ) { - /* if this process really wants to die, let it go */ - if ( sigismember(&(current->pending.signal), SIGKILL) || - sigismember(&(current->pending.signal), SIGINT) ) - break; - /* signal is present: after timeout always return - really smart idea, probably useless ... */ - if ( jiffies - vmp->uc_posttime > coda_timeout * HZ ) - break; + if (blocked && time_after(jiffies, timeout) && + CODA_INTERRUPTIBLE(req)) + { + unblock_signals(&old); + blocked = 0; + } + + if (signal_pending(current)) { + list_del(&req->uc_chain); + break; } - schedule(); + + if (blocked) + schedule_timeout(HZ); + else + schedule(); } - remove_wait_queue(&vmp->uc_sleep, &wait); - set_current_state(TASK_RUNNING); + if (blocked) + unblock_signals(&old); - return; + remove_wait_queue(&req->uc_sleep, &wait); + set_current_state(TASK_RUNNING); } -/* - * coda_upcall will return an error in the case of +/* + * coda_upcall will return an error in the case of * failed communication with Venus _or_ will peek at Venus * reply and return Venus' error. * * As venus has 2 types of errors, normal errors (positive) and internal * errors (negative), normal errors are negated, while internal errors * are all mapped to -EINTR, while showing a nice warning message. (jh) - * */ -static int coda_upcall(struct coda_sb_info *sbi, - int inSize, int *outSize, - union inputArgs *buffer) +static int coda_upcall(struct venus_comm *vcp, + int inSize, int *outSize, + union inputArgs *buffer) { - struct venus_comm *vcommp; union outputArgs *out; - struct upc_req *req; + union inputArgs *sig_inputArgs; + struct upc_req *req, *sig_req; int error = 0; - vcommp = sbi->sbi_vcomm; - if ( !vcommp->vc_inuse ) { - printk("No pseudo device in upcall comms at %p\n", vcommp); - return -ENXIO; + if (!vcp->vc_inuse) { + printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n"); + return -ENXIO; } /* Format the request message. */ - req = upc_alloc(); - if (!req) { - printk("Failed to allocate upc_req structure\n"); + req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); + if (!req) return -ENOMEM; - } + req->uc_data = (void *)buffer; req->uc_flags = 0; req->uc_inSize = inSize; req->uc_outSize = *outSize ? *outSize : inSize; req->uc_opcode = ((union inputArgs *)buffer)->ih.opcode; - req->uc_unique = ++vcommp->vc_seq; + req->uc_unique = ++vcp->vc_seq; init_waitqueue_head(&req->uc_sleep); - + /* Fill in the common input args. */ ((union inputArgs *)buffer)->ih.unique = req->uc_unique; /* Append msg to pending queue and poke Venus. */ - list_add_tail(&(req->uc_chain), &vcommp->vc_pending); - - wake_up_interruptible(&vcommp->vc_waitq); + list_add_tail(&req->uc_chain, &vcp->vc_pending); + + wake_up_interruptible(&vcp->vc_waitq); /* We can be interrupted while we wait for Venus to process * our request. If the interrupt occurs before Venus has read * the request, we dequeue and return. If it occurs after the @@ -738,67 +764,60 @@ static int coda_upcall(struct coda_sb_info *sbi, * ENODEV. */ /* Go to sleep. Wake up on signals only after the timeout. */ - coda_waitfor_upcall(req, vcommp); + coda_waitfor_upcall(req); - if (vcommp->vc_inuse) { /* i.e. Venus is still alive */ - /* Op went through, interrupt or not... */ - if (req->uc_flags & REQ_WRITE) { + /* Op went through, interrupt or not... */ + if (req->uc_flags & REQ_WRITE) { out = (union outputArgs *)req->uc_data; /* here we map positive Venus errors to kernel errors */ error = -out->oh.result; *outSize = req->uc_outSize; goto exit; - } - if ( !(req->uc_flags & REQ_READ) && signal_pending(current)) { - /* Interrupted before venus read it. */ - list_del(&(req->uc_chain)); - /* perhaps the best way to convince the app to - give up? */ - error = -EINTR; + } + + error = -EINTR; + if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) { + printk(KERN_WARNING "coda: Unexpected interruption.\n"); goto exit; - } - if ( (req->uc_flags & REQ_READ) && signal_pending(current) ) { - /* interrupted after Venus did its read, send signal */ - union inputArgs *sig_inputArgs; - struct upc_req *sig_req; - - list_del(&(req->uc_chain)); - error = -ENOMEM; - sig_req = upc_alloc(); - if (!sig_req) goto exit; - - CODA_ALLOC((sig_req->uc_data), char *, sizeof(struct coda_in_hdr)); - if (!sig_req->uc_data) { - upc_free(sig_req); - goto exit; - } - - error = -EINTR; - sig_inputArgs = (union inputArgs *)sig_req->uc_data; - sig_inputArgs->ih.opcode = CODA_SIGNAL; - sig_inputArgs->ih.unique = req->uc_unique; - - sig_req->uc_flags = REQ_ASYNC; - sig_req->uc_opcode = sig_inputArgs->ih.opcode; - sig_req->uc_unique = sig_inputArgs->ih.unique; - sig_req->uc_inSize = sizeof(struct coda_in_hdr); - sig_req->uc_outSize = sizeof(struct coda_in_hdr); - - /* insert at head of queue! */ - list_add(&(sig_req->uc_chain), &vcommp->vc_pending); - wake_up_interruptible(&vcommp->vc_waitq); - } else { - printk("Coda: Strange interruption..\n"); - error = -EINTR; - } - } else { /* If venus died i.e. !VC_OPEN(vcommp) */ - printk("coda_upcall: Venus dead on (op,un) (%d.%d) flags %d\n", - req->uc_opcode, req->uc_unique, req->uc_flags); - error = -ENODEV; } - exit: - upc_free(req); + /* Interrupted before venus read it. */ + if (!(req->uc_flags & REQ_READ)) + goto exit; + + /* Venus saw the upcall, make sure we can send interrupt signal */ + if (!vcp->vc_inuse) { + printk(KERN_INFO "coda: Venus dead, not sending signal.\n"); + goto exit; + } + + error = -ENOMEM; + sig_req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); + if (!sig_req) goto exit; + + CODA_ALLOC((sig_req->uc_data), char *, sizeof(struct coda_in_hdr)); + if (!sig_req->uc_data) { + kfree(sig_req); + goto exit; + } + + error = -EINTR; + sig_inputArgs = (union inputArgs *)sig_req->uc_data; + sig_inputArgs->ih.opcode = CODA_SIGNAL; + sig_inputArgs->ih.unique = req->uc_unique; + + sig_req->uc_flags = REQ_ASYNC; + sig_req->uc_opcode = sig_inputArgs->ih.opcode; + sig_req->uc_unique = sig_inputArgs->ih.unique; + sig_req->uc_inSize = sizeof(struct coda_in_hdr); + sig_req->uc_outSize = sizeof(struct coda_in_hdr); + + /* insert at head of queue! */ + list_add(&(sig_req->uc_chain), &vcp->vc_pending); + wake_up_interruptible(&vcp->vc_waitq); + +exit: + kfree(req); return error; } @@ -838,77 +857,66 @@ static int coda_upcall(struct coda_sb_info *sbi, int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) { + struct inode *inode = NULL; + struct CodaFid *fid, *newfid; + /* Handle invalidation requests. */ - if ( !sb || !sb->s_root || !sb->s_root->d_inode) - return 0; - - switch (opcode) { - - case CODA_FLUSH : { - coda_cache_clear_all(sb); - shrink_dcache_sb(sb); - coda_flag_inode(sb->s_root->d_inode, C_FLUSH); - return(0); - } - - case CODA_PURGEUSER : { - coda_cache_clear_all(sb); - return(0); - } - - case CODA_ZAPDIR : { - struct inode *inode; - struct CodaFid *fid = &out->coda_zapdir.CodaFid; - - inode = coda_fid_to_inode(fid, sb); - if (inode) { - coda_flag_inode_children(inode, C_PURGE); - coda_flag_inode(inode, C_VATTR); - iput(inode); - } - - return(0); - } - - case CODA_ZAPFILE : { - struct inode *inode; - struct CodaFid *fid = &out->coda_zapfile.CodaFid; - inode = coda_fid_to_inode(fid, sb); - if ( inode ) { - coda_flag_inode(inode, C_VATTR); - iput(inode); - } - return 0; - } - - case CODA_PURGEFID : { - struct inode *inode; - struct CodaFid *fid = &out->coda_purgefid.CodaFid; - inode = coda_fid_to_inode(fid, sb); - if ( inode ) { + if ( !sb || !sb->s_root) + return 0; + + switch (opcode) { + case CODA_FLUSH: + coda_cache_clear_all(sb); + shrink_dcache_sb(sb); + if (sb->s_root->d_inode) + coda_flag_inode(sb->s_root->d_inode, C_FLUSH); + break; + + case CODA_PURGEUSER: + coda_cache_clear_all(sb); + break; + + case CODA_ZAPDIR: + fid = &out->coda_zapdir.CodaFid; + inode = coda_fid_to_inode(fid, sb); + if (inode) { + coda_flag_inode_children(inode, C_PURGE); + coda_flag_inode(inode, C_VATTR); + } + break; + + case CODA_ZAPFILE: + fid = &out->coda_zapfile.CodaFid; + inode = coda_fid_to_inode(fid, sb); + if (inode) + coda_flag_inode(inode, C_VATTR); + break; + + case CODA_PURGEFID: + fid = &out->coda_purgefid.CodaFid; + inode = coda_fid_to_inode(fid, sb); + if (inode) { coda_flag_inode_children(inode, C_PURGE); /* catch the dentries later if some are still busy */ coda_flag_inode(inode, C_PURGE); d_prune_aliases(inode); - iput(inode); - } - return 0; - } - - case CODA_REPLACE : { - struct inode *inode; - struct CodaFid *oldfid = &out->coda_replace.OldFid; - struct CodaFid *newfid = &out->coda_replace.NewFid; - inode = coda_fid_to_inode(oldfid, sb); - if ( inode ) { - coda_replace_fid(inode, oldfid, newfid); - iput(inode); - } - return 0; - } - } - return 0; + } + break; + + case CODA_REPLACE: + fid = &out->coda_replace.OldFid; + newfid = &out->coda_replace.NewFid; + inode = coda_fid_to_inode(fid, sb); + if (inode) + coda_replace_fid(inode, fid, newfid); + break; + } + + if (inode) + iput(inode); + + return 0; } diff --git a/fs/compat.c b/fs/compat.c index 4db6216e5266..15078ce4c04a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1257,6 +1257,7 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv, { struct page *kmapped_page = NULL; char *kaddr = NULL; + unsigned long kpos = 0; int ret; while (argc-- > 0) { @@ -1265,92 +1266,84 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv, unsigned long pos; if (get_user(str, argv+argc) || - !(len = strnlen_user(compat_ptr(str), bprm->p))) { + !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) { ret = -EFAULT; goto out; } - if (bprm->p < len) { + if (len > MAX_ARG_STRLEN) { ret = -E2BIG; goto out; } - bprm->p -= len; - /* XXX: add architecture specific overflow check here. */ + /* We're going to work our way backwords. */ pos = bprm->p; + str += len; + bprm->p -= len; while (len > 0) { - int i, new, err; int offset, bytes_to_copy; - struct page *page; offset = pos % PAGE_SIZE; - i = pos/PAGE_SIZE; - page = bprm->page[i]; - new = 0; - if (!page) { - page = alloc_page(GFP_HIGHUSER); - bprm->page[i] = page; - if (!page) { - ret = -ENOMEM; + if (offset == 0) + offset = PAGE_SIZE; + + bytes_to_copy = offset; + if (bytes_to_copy > len) + bytes_to_copy = len; + + offset -= bytes_to_copy; + pos -= bytes_to_copy; + str -= bytes_to_copy; + len -= bytes_to_copy; + + if (!kmapped_page || kpos != (pos & PAGE_MASK)) { + struct page *page; + +#ifdef CONFIG_STACK_GROWSUP + ret = expand_stack_downwards(bprm->vma, pos); + if (ret < 0) { + /* We've exceed the stack rlimit. */ + ret = -E2BIG; + goto out; + } +#endif + ret = get_user_pages(current, bprm->mm, pos, + 1, 1, 1, &page, NULL); + if (ret <= 0) { + /* We've exceed the stack rlimit. */ + ret = -E2BIG; goto out; } - new = 1; - } - if (page != kmapped_page) { - if (kmapped_page) + if (kmapped_page) { + flush_kernel_dcache_page(kmapped_page); kunmap(kmapped_page); + put_page(kmapped_page); + } kmapped_page = page; kaddr = kmap(kmapped_page); + kpos = pos & PAGE_MASK; + flush_cache_page(bprm->vma, kpos, + page_to_pfn(kmapped_page)); } - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); - } - err = copy_from_user(kaddr+offset, compat_ptr(str), - bytes_to_copy); - if (err) { + if (copy_from_user(kaddr+offset, compat_ptr(str), + bytes_to_copy)) { ret = -EFAULT; goto out; } - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; } } ret = 0; out: - if (kmapped_page) + if (kmapped_page) { + flush_kernel_dcache_page(kmapped_page); kunmap(kmapped_page); - return ret; -} - -#ifdef CONFIG_MMU - -#define free_arg_pages(bprm) do { } while (0) - -#else - -static inline void free_arg_pages(struct linux_binprm *bprm) -{ - int i; - - for (i = 0; i < MAX_ARG_PAGES; i++) { - if (bprm->page[i]) - __free_page(bprm->page[i]); - bprm->page[i] = NULL; + put_page(kmapped_page); } + return ret; } -#endif /* CONFIG_MMU */ - /* * compat_do_execve() is mostly a copy of do_execve(), with the exception * that it processes 32 bit argv and envp pointers. @@ -1363,7 +1356,6 @@ int compat_do_execve(char * filename, struct linux_binprm *bprm; struct file *file; int retval; - int i; retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); @@ -1377,24 +1369,19 @@ int compat_do_execve(char * filename, sched_exec(); - bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); bprm->file = file; bprm->filename = filename; bprm->interp = filename; - bprm->mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm->mm) - goto out_file; - retval = init_new_context(current, bprm->mm); - if (retval < 0) - goto out_mm; + retval = bprm_mm_init(bprm); + if (retval) + goto out_file; - bprm->argc = compat_count(argv, bprm->p / sizeof(compat_uptr_t)); + bprm->argc = compat_count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) goto out_mm; - bprm->envc = compat_count(envp, bprm->p / sizeof(compat_uptr_t)); + bprm->envc = compat_count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) goto out_mm; @@ -1421,8 +1408,6 @@ int compat_do_execve(char * filename, retval = search_binary_handler(bprm, regs); if (retval >= 0) { - free_arg_pages(bprm); - /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); @@ -1431,19 +1416,12 @@ int compat_do_execve(char * filename, } out: - /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm->page[i]; - if (page) - __free_page(page); - } - if (bprm->security) security_bprm_free(bprm); out_mm: if (bprm->mm) - mmdrop(bprm->mm); + mmput(bprm->mm); out_file: if (bprm->file) { diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 1d533a2ec3a6..11be8a325e26 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -345,11 +345,6 @@ void debugfs_remove(struct dentry *dentry) switch (dentry->d_inode->i_mode & S_IFMT) { case S_IFDIR: ret = simple_rmdir(parent->d_inode, dentry); - if (ret) - printk(KERN_ERR - "DebugFS rmdir on %s failed : " - "directory not empty.\n", - dentry->d_name.name); break; case S_IFLNK: kfree(dentry->d_inode->i_private); diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index f858fef6e41c..fb9e2ee998ae 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -39,9 +39,7 @@ char *allocate_lvb(struct dlm_ls *ls) { char *p; - p = kmalloc(ls->ls_lvblen, GFP_KERNEL); - if (p) - memset(p, 0, ls->ls_lvblen); + p = kzalloc(ls->ls_lvblen, GFP_KERNEL); return p; } @@ -59,9 +57,7 @@ struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); - r = kmalloc(sizeof(*r) + namelen, GFP_KERNEL); - if (r) - memset(r, 0, sizeof(*r) + namelen); + r = kzalloc(sizeof(*r) + namelen, GFP_KERNEL); return r; } @@ -101,9 +97,7 @@ struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN, printk("namelen = %d\n", namelen);); - de = kmalloc(sizeof(*de) + namelen, GFP_KERNEL); - if (de) - memset(de, 0, sizeof(*de) + namelen); + de = kzalloc(sizeof(*de) + namelen, GFP_KERNEL); return de; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e77a2ec71aa5..0a50942b4378 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -902,8 +902,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) mutex_lock(&crypt_stat->cs_mutex); if (S_ISDIR(dentry->d_inode->i_mode)) crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); - else if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) - || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { + else if (S_ISREG(dentry->d_inode->i_mode) + && (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) + || !(crypt_stat->flags & ECRYPTFS_KEY_VALID))) { struct vfsmount *lower_mnt; struct file *lower_file = NULL; struct ecryptfs_mount_crypt_stat *mount_crypt_stat; diff --git a/fs/exec.c b/fs/exec.c index f20561ff4528..7bdea7937ee8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -54,6 +54,7 @@ #include <asm/uaccess.h> #include <asm/mmu_context.h> +#include <asm/tlb.h> #ifdef CONFIG_KMOD #include <linux/kmod.h> @@ -178,6 +179,207 @@ exit: goto out; } +#ifdef CONFIG_MMU + +static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, + int write) +{ + struct page *page; + int ret; + +#ifdef CONFIG_STACK_GROWSUP + if (write) { + ret = expand_stack_downwards(bprm->vma, pos); + if (ret < 0) + return NULL; + } +#endif + ret = get_user_pages(current, bprm->mm, pos, + 1, write, 1, &page, NULL); + if (ret <= 0) + return NULL; + + if (write) { + struct rlimit *rlim = current->signal->rlim; + unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; + + /* + * Limit to 1/4-th the stack size for the argv+env strings. + * This ensures that: + * - the remaining binfmt code will not run out of stack space, + * - the program will have a reasonable amount of stack left + * to work from. + */ + if (size > rlim[RLIMIT_STACK].rlim_cur / 4) { + put_page(page); + return NULL; + } + } + + return page; +} + +static void put_arg_page(struct page *page) +{ + put_page(page); +} + +static void free_arg_page(struct linux_binprm *bprm, int i) +{ +} + +static void free_arg_pages(struct linux_binprm *bprm) +{ +} + +static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, + struct page *page) +{ + flush_cache_page(bprm->vma, pos, page_to_pfn(page)); +} + +static int __bprm_mm_init(struct linux_binprm *bprm) +{ + int err = -ENOMEM; + struct vm_area_struct *vma = NULL; + struct mm_struct *mm = bprm->mm; + + bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (!vma) + goto err; + + down_write(&mm->mmap_sem); + vma->vm_mm = mm; + + /* + * Place the stack at the largest stack address the architecture + * supports. Later, we'll move this to an appropriate place. We don't + * use STACK_TOP because that can depend on attributes which aren't + * configured yet. + */ + vma->vm_end = STACK_TOP_MAX; + vma->vm_start = vma->vm_end - PAGE_SIZE; + + vma->vm_flags = VM_STACK_FLAGS; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + err = insert_vm_struct(mm, vma); + if (err) { + up_write(&mm->mmap_sem); + goto err; + } + + mm->stack_vm = mm->total_vm = 1; + up_write(&mm->mmap_sem); + + bprm->p = vma->vm_end - sizeof(void *); + + return 0; + +err: + if (vma) { + bprm->vma = NULL; + kmem_cache_free(vm_area_cachep, vma); + } + + return err; +} + +static bool valid_arg_len(struct linux_binprm *bprm, long len) +{ + return len <= MAX_ARG_STRLEN; +} + +#else + +static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, + int write) +{ + struct page *page; + + page = bprm->page[pos / PAGE_SIZE]; + if (!page && write) { + page = alloc_page(GFP_HIGHUSER|__GFP_ZERO); + if (!page) + return NULL; + bprm->page[pos / PAGE_SIZE] = page; + } + + return page; +} + +static void put_arg_page(struct page *page) +{ +} + +static void free_arg_page(struct linux_binprm *bprm, int i) +{ + if (bprm->page[i]) { + __free_page(bprm->page[i]); + bprm->page[i] = NULL; + } +} + +static void free_arg_pages(struct linux_binprm *bprm) +{ + int i; + + for (i = 0; i < MAX_ARG_PAGES; i++) + free_arg_page(bprm, i); +} + +static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, + struct page *page) +{ +} + +static int __bprm_mm_init(struct linux_binprm *bprm) +{ + bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *); + return 0; +} + +static bool valid_arg_len(struct linux_binprm *bprm, long len) +{ + return len <= bprm->p; +} + +#endif /* CONFIG_MMU */ + +/* + * Create a new mm_struct and populate it with a temporary stack + * vm_area_struct. We don't have enough context at this point to set the stack + * flags, permissions, and offset, so we use temporary values. We'll update + * them later in setup_arg_pages(). + */ +int bprm_mm_init(struct linux_binprm *bprm) +{ + int err; + struct mm_struct *mm = NULL; + + bprm->mm = mm = mm_alloc(); + err = -ENOMEM; + if (!mm) + goto err; + + err = init_new_context(current, mm); + if (err) + goto err; + + err = __bprm_mm_init(bprm); + if (err) + goto err; + + return 0; + +err: + if (mm) { + bprm->mm = NULL; + mmdrop(mm); + } + + return err; +} + /* * count() counts the number of strings in array ARGV. */ @@ -203,15 +405,16 @@ static int count(char __user * __user * argv, int max) } /* - * 'copy_strings()' copies argument/environment strings from user - * memory to free pages in kernel mem. These are in a format ready - * to be put directly into the top of new user memory. + * 'copy_strings()' copies argument/environment strings from the old + * processes's memory to the new process's stack. The call to get_user_pages() + * ensures the destination page is created and not swapped out. */ static int copy_strings(int argc, char __user * __user * argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; char *kaddr = NULL; + unsigned long kpos = 0; int ret; while (argc-- > 0) { @@ -220,69 +423,69 @@ static int copy_strings(int argc, char __user * __user * argv, unsigned long pos; if (get_user(str, argv+argc) || - !(len = strnlen_user(str, bprm->p))) { + !(len = strnlen_user(str, MAX_ARG_STRLEN))) { ret = -EFAULT; goto out; } - if (bprm->p < len) { + if (!valid_arg_len(bprm, len)) { ret = -E2BIG; goto out; } - bprm->p -= len; - /* XXX: add architecture specific overflow check here. */ + /* We're going to work our way backwords. */ pos = bprm->p; + str += len; + bprm->p -= len; while (len > 0) { - int i, new, err; int offset, bytes_to_copy; - struct page *page; offset = pos % PAGE_SIZE; - i = pos/PAGE_SIZE; - page = bprm->page[i]; - new = 0; - if (!page) { - page = alloc_page(GFP_HIGHUSER); - bprm->page[i] = page; + if (offset == 0) + offset = PAGE_SIZE; + + bytes_to_copy = offset; + if (bytes_to_copy > len) + bytes_to_copy = len; + + offset -= bytes_to_copy; + pos -= bytes_to_copy; + str -= bytes_to_copy; + len -= bytes_to_copy; + + if (!kmapped_page || kpos != (pos & PAGE_MASK)) { + struct page *page; + + page = get_arg_page(bprm, pos, 1); if (!page) { - ret = -ENOMEM; + ret = -E2BIG; goto out; } - new = 1; - } - if (page != kmapped_page) { - if (kmapped_page) + if (kmapped_page) { + flush_kernel_dcache_page(kmapped_page); kunmap(kmapped_page); + put_arg_page(kmapped_page); + } kmapped_page = page; kaddr = kmap(kmapped_page); + kpos = pos & PAGE_MASK; + flush_arg_page(bprm, kpos, kmapped_page); } - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); - } - err = copy_from_user(kaddr+offset, str, bytes_to_copy); - if (err) { + if (copy_from_user(kaddr+offset, str, bytes_to_copy)) { ret = -EFAULT; goto out; } - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; } } ret = 0; out: - if (kmapped_page) + if (kmapped_page) { + flush_kernel_dcache_page(kmapped_page); kunmap(kmapped_page); + put_arg_page(kmapped_page); + } return ret; } @@ -298,181 +501,172 @@ int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) set_fs(oldfs); return r; } - EXPORT_SYMBOL(copy_strings_kernel); #ifdef CONFIG_MMU + /* - * This routine is used to map in a page into an address space: needed by - * execve() for the initial stack and environment pages. + * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once + * the binfmt code determines where the new stack should reside, we shift it to + * its final location. The process proceeds as follows: * - * vma->vm_mm->mmap_sem is held for writing. + * 1) Use shift to calculate the new vma endpoints. + * 2) Extend vma to cover both the old and new ranges. This ensures the + * arguments passed to subsequent functions are consistent. + * 3) Move vma's page tables to the new range. + * 4) Free up any cleared pgd range. + * 5) Shrink the vma to cover only the new range. */ -void install_arg_page(struct vm_area_struct *vma, - struct page *page, unsigned long address) +static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) { struct mm_struct *mm = vma->vm_mm; - pte_t * pte; - spinlock_t *ptl; + unsigned long old_start = vma->vm_start; + unsigned long old_end = vma->vm_end; + unsigned long length = old_end - old_start; + unsigned long new_start = old_start - shift; + unsigned long new_end = old_end - shift; + struct mmu_gather *tlb; - if (unlikely(anon_vma_prepare(vma))) - goto out; + BUG_ON(new_start > new_end); - flush_dcache_page(page); - pte = get_locked_pte(mm, address, &ptl); - if (!pte) - goto out; - if (!pte_none(*pte)) { - pte_unmap_unlock(pte, ptl); - goto out; + /* + * ensure there are no vmas between where we want to go + * and where we are + */ + if (vma != find_vma(mm, new_start)) + return -EFAULT; + + /* + * cover the whole range: [new_start, old_end) + */ + vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL); + + /* + * move the page tables downwards, on failure we rely on + * process cleanup to remove whatever mess we made. + */ + if (length != move_page_tables(vma, old_start, + vma, new_start, length)) + return -ENOMEM; + + lru_add_drain(); + tlb = tlb_gather_mmu(mm, 0); + if (new_end > old_start) { + /* + * when the old and new regions overlap clear from new_end. + */ + free_pgd_range(&tlb, new_end, old_end, new_end, + vma->vm_next ? vma->vm_next->vm_start : 0); + } else { + /* + * otherwise, clean from old_start; this is done to not touch + * the address space in [new_end, old_start) some architectures + * have constraints on va-space that make this illegal (IA64) - + * for the others its just a little faster. + */ + free_pgd_range(&tlb, old_start, old_end, new_end, + vma->vm_next ? vma->vm_next->vm_start : 0); } - inc_mm_counter(mm, anon_rss); - lru_cache_add_active(page); - set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( - page, vma->vm_page_prot)))); - page_add_new_anon_rmap(page, vma, address); - pte_unmap_unlock(pte, ptl); - - /* no need for flush_tlb */ - return; -out: - __free_page(page); - force_sig(SIGKILL, current); + tlb_finish_mmu(tlb, new_end, old_end); + + /* + * shrink the vma to just the new range. + */ + vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); + + return 0; } #define EXTRA_STACK_VM_PAGES 20 /* random */ +/* + * Finalizes the stack vm_area_struct. The flags and permissions are updated, + * the stack is optionally relocated, and some extra space is added. + */ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) { - unsigned long stack_base; - struct vm_area_struct *mpnt; + unsigned long ret; + unsigned long stack_shift; struct mm_struct *mm = current->mm; - int i, ret; - long arg_size; + struct vm_area_struct *vma = bprm->vma; + struct vm_area_struct *prev = NULL; + unsigned long vm_flags; + unsigned long stack_base; #ifdef CONFIG_STACK_GROWSUP - /* Move the argument and environment strings to the bottom of the - * stack space. - */ - int offset, j; - char *to, *from; - - /* Start by shifting all the pages down */ - i = 0; - for (j = 0; j < MAX_ARG_PAGES; j++) { - struct page *page = bprm->page[j]; - if (!page) - continue; - bprm->page[i++] = page; - } - - /* Now move them within their pages */ - offset = bprm->p % PAGE_SIZE; - to = kmap(bprm->page[0]); - for (j = 1; j < i; j++) { - memmove(to, to + offset, PAGE_SIZE - offset); - from = kmap(bprm->page[j]); - memcpy(to + PAGE_SIZE - offset, from, offset); - kunmap(bprm->page[j - 1]); - to = from; - } - memmove(to, to + offset, PAGE_SIZE - offset); - kunmap(bprm->page[j - 1]); - /* Limit stack size to 1GB */ stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; if (stack_base > (1 << 30)) stack_base = 1 << 30; - stack_base = PAGE_ALIGN(stack_top - stack_base); - /* Adjust bprm->p to point to the end of the strings. */ - bprm->p = stack_base + PAGE_SIZE * i - offset; + /* Make sure we didn't let the argument array grow too large. */ + if (vma->vm_end - vma->vm_start > stack_base) + return -ENOMEM; - mm->arg_start = stack_base; - arg_size = i << PAGE_SHIFT; + stack_base = PAGE_ALIGN(stack_top - stack_base); - /* zero pages that were copied above */ - while (i < MAX_ARG_PAGES) - bprm->page[i++] = NULL; + stack_shift = vma->vm_start - stack_base; + mm->arg_start = bprm->p - stack_shift; + bprm->p = vma->vm_end - stack_shift; #else - stack_base = arch_align_stack(stack_top - MAX_ARG_PAGES*PAGE_SIZE); - stack_base = PAGE_ALIGN(stack_base); - bprm->p += stack_base; + stack_top = arch_align_stack(stack_top); + stack_top = PAGE_ALIGN(stack_top); + stack_shift = vma->vm_end - stack_top; + + bprm->p -= stack_shift; mm->arg_start = bprm->p; - arg_size = stack_top - (PAGE_MASK & (unsigned long) mm->arg_start); #endif - arg_size += EXTRA_STACK_VM_PAGES * PAGE_SIZE; - if (bprm->loader) - bprm->loader += stack_base; - bprm->exec += stack_base; - - mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); - if (!mpnt) - return -ENOMEM; + bprm->loader -= stack_shift; + bprm->exec -= stack_shift; down_write(&mm->mmap_sem); - { - mpnt->vm_mm = mm; -#ifdef CONFIG_STACK_GROWSUP - mpnt->vm_start = stack_base; - mpnt->vm_end = stack_base + arg_size; -#else - mpnt->vm_end = stack_top; - mpnt->vm_start = mpnt->vm_end - arg_size; -#endif - /* Adjust stack execute permissions; explicitly enable - * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X - * and leave alone (arch default) otherwise. */ - if (unlikely(executable_stack == EXSTACK_ENABLE_X)) - mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; - else if (executable_stack == EXSTACK_DISABLE_X) - mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC; - else - mpnt->vm_flags = VM_STACK_FLAGS; - mpnt->vm_flags |= mm->def_flags; - mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7]; - if ((ret = insert_vm_struct(mm, mpnt))) { + vm_flags = vma->vm_flags; + + /* + * Adjust stack execute permissions; explicitly enable for + * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone + * (arch default) otherwise. + */ + if (unlikely(executable_stack == EXSTACK_ENABLE_X)) + vm_flags |= VM_EXEC; + else if (executable_stack == EXSTACK_DISABLE_X) + vm_flags &= ~VM_EXEC; + vm_flags |= mm->def_flags; + + ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end, + vm_flags); + if (ret) + goto out_unlock; + BUG_ON(prev != vma); + + /* Move stack pages down in memory. */ + if (stack_shift) { + ret = shift_arg_pages(vma, stack_shift); + if (ret) { up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, mpnt); return ret; } - mm->stack_vm = mm->total_vm = vma_pages(mpnt); } - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page *page = bprm->page[i]; - if (page) { - bprm->page[i] = NULL; - install_arg_page(mpnt, page, stack_base); - } - stack_base += PAGE_SIZE; - } +#ifdef CONFIG_STACK_GROWSUP + stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; +#else + stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; +#endif + ret = expand_stack(vma, stack_base); + if (ret) + ret = -EFAULT; + +out_unlock: up_write(&mm->mmap_sem); - return 0; } - EXPORT_SYMBOL(setup_arg_pages); -#define free_arg_pages(bprm) do { } while (0) - -#else - -static inline void free_arg_pages(struct linux_binprm *bprm) -{ - int i; - - for (i = 0; i < MAX_ARG_PAGES; i++) { - if (bprm->page[i]) - __free_page(bprm->page[i]); - bprm->page[i] = NULL; - } -} - #endif /* CONFIG_MMU */ struct file *open_exec(const char *name) @@ -864,9 +1058,9 @@ int flush_old_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; if (current->euid == current->uid && current->egid == current->gid) - current->mm->dumpable = 1; + set_dumpable(current->mm, 1); else - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); name = bprm->filename; @@ -894,7 +1088,7 @@ int flush_old_exec(struct linux_binprm * bprm) file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { suid_keys(current); - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); } /* An exec changes our domain. We are no longer part of the thread @@ -1000,43 +1194,42 @@ EXPORT_SYMBOL(compute_creds); * points to; chop off the first by relocating brpm->p to right after * the first '\0' encountered. */ -void remove_arg_zero(struct linux_binprm *bprm) +int remove_arg_zero(struct linux_binprm *bprm) { - if (bprm->argc) { - char ch; + int ret = 0; + unsigned long offset; + char *kaddr; + struct page *page; - do { - unsigned long offset; - unsigned long index; - char *kaddr; - struct page *page; - - offset = bprm->p & ~PAGE_MASK; - index = bprm->p >> PAGE_SHIFT; + if (!bprm->argc) + return 0; - page = bprm->page[index]; - kaddr = kmap_atomic(page, KM_USER0); + do { + offset = bprm->p & ~PAGE_MASK; + page = get_arg_page(bprm, bprm->p, 0); + if (!page) { + ret = -EFAULT; + goto out; + } + kaddr = kmap_atomic(page, KM_USER0); - /* run through page until we reach end or find NUL */ - do { - ch = *(kaddr + offset); + for (; offset < PAGE_SIZE && kaddr[offset]; + offset++, bprm->p++) + ; - /* discard that character... */ - bprm->p++; - offset++; - } while (offset < PAGE_SIZE && ch != '\0'); + kunmap_atomic(kaddr, KM_USER0); + put_arg_page(page); - kunmap_atomic(kaddr, KM_USER0); + if (offset == PAGE_SIZE) + free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1); + } while (offset == PAGE_SIZE); - /* free the old page */ - if (offset == PAGE_SIZE) { - __free_page(page); - bprm->page[index] = NULL; - } - } while (ch != '\0'); + bprm->p++; + bprm->argc--; + ret = 0; - bprm->argc--; - } +out: + return ret; } EXPORT_SYMBOL(remove_arg_zero); @@ -1062,7 +1255,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) fput(bprm->file); bprm->file = NULL; - loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); + loader = bprm->vma->vm_end - sizeof(void *); file = open_exec("/sbin/loader"); retval = PTR_ERR(file); @@ -1154,8 +1347,8 @@ int do_execve(char * filename, { struct linux_binprm *bprm; struct file *file; + unsigned long env_p; int retval; - int i; retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); @@ -1169,25 +1362,19 @@ int do_execve(char * filename, sched_exec(); - bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - bprm->file = file; bprm->filename = filename; bprm->interp = filename; - bprm->mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm->mm) - goto out_file; - retval = init_new_context(current, bprm->mm); - if (retval < 0) - goto out_mm; + retval = bprm_mm_init(bprm); + if (retval) + goto out_file; - bprm->argc = count(argv, bprm->p / sizeof(void *)); + bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) goto out_mm; - bprm->envc = count(envp, bprm->p / sizeof(void *)); + bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) goto out_mm; @@ -1208,15 +1395,16 @@ int do_execve(char * filename, if (retval < 0) goto out; + env_p = bprm->p; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; + bprm->argv_len = env_p - bprm->p; retval = search_binary_handler(bprm,regs); if (retval >= 0) { - free_arg_pages(bprm); - /* execve success */ + free_arg_pages(bprm); security_bprm_free(bprm); acct_update_integrals(current); kfree(bprm); @@ -1224,26 +1412,19 @@ int do_execve(char * filename, } out: - /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm->page[i]; - if (page) - __free_page(page); - } - + free_arg_pages(bprm); if (bprm->security) security_bprm_free(bprm); out_mm: if (bprm->mm) - mmdrop(bprm->mm); + mmput (bprm->mm); out_file: if (bprm->file) { allow_write_access(bprm->file); fput(bprm->file); } - out_kfree: kfree(bprm); @@ -1484,6 +1665,56 @@ fail: return core_waiters; } +/* + * set_dumpable converts traditional three-value dumpable to two flags and + * stores them into mm->flags. It modifies lower two bits of mm->flags, but + * these bits are not changed atomically. So get_dumpable can observe the + * intermediate state. To avoid doing unexpected behavior, get get_dumpable + * return either old dumpable or new one by paying attention to the order of + * modifying the bits. + * + * dumpable | mm->flags (binary) + * old new | initial interim final + * ---------+----------------------- + * 0 1 | 00 01 01 + * 0 2 | 00 10(*) 11 + * 1 0 | 01 00 00 + * 1 2 | 01 11 11 + * 2 0 | 11 10(*) 00 + * 2 1 | 11 11 01 + * + * (*) get_dumpable regards interim value of 10 as 11. + */ +void set_dumpable(struct mm_struct *mm, int value) +{ + switch (value) { + case 0: + clear_bit(MMF_DUMPABLE, &mm->flags); + smp_wmb(); + clear_bit(MMF_DUMP_SECURELY, &mm->flags); + break; + case 1: + set_bit(MMF_DUMPABLE, &mm->flags); + smp_wmb(); + clear_bit(MMF_DUMP_SECURELY, &mm->flags); + break; + case 2: + set_bit(MMF_DUMP_SECURELY, &mm->flags); + smp_wmb(); + set_bit(MMF_DUMPABLE, &mm->flags); + break; + } +} +EXPORT_SYMBOL_GPL(set_dumpable); + +int get_dumpable(struct mm_struct *mm) +{ + int ret; + + ret = mm->flags & 0x3; + return (ret >= 2) ? 2 : ret; +} + int do_coredump(long signr, int exit_code, struct pt_regs * regs) { char corename[CORENAME_MAX_SIZE + 1]; @@ -1502,7 +1733,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) if (!binfmt || !binfmt->core_dump) goto fail; down_write(&mm->mmap_sem); - if (!mm->dumpable) { + if (!get_dumpable(mm)) { up_write(&mm->mmap_sem); goto fail; } @@ -1512,11 +1743,11 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * process nor do we know its entire history. We only know it * was tainted so we dump it as root in mode 2. */ - if (mm->dumpable == 2) { /* Setuid core dump mode */ + if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ current->fsuid = 0; /* Dump root private */ } - mm->dumpable = 0; + set_dumpable(mm, 0); retval = coredump_wait(exit_code); if (retval < 0) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3eefa97fe204..a6b1072daea7 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -883,13 +883,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } bgl_lock_init(&sbi->s_blockgroup_lock); - sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), - GFP_KERNEL); + sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL); if (!sbi->s_debts) { printk ("EXT2-fs: not enough memory\n"); goto failed_mount_group_desc; } - memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts)); for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logic_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 852869840f24..c00723a99f44 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -136,12 +136,14 @@ static int ext3_readdir(struct file * filp, err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0, 0); if (err > 0) { - page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, - &filp->f_ra, - filp, - map_bh.b_blocknr >> - (PAGE_CACHE_SHIFT - inode->i_blkbits), - 1); + pgoff_t index = map_bh.b_blocknr >> + (PAGE_CACHE_SHIFT - inode->i_blkbits); + if (!ra_has_index(&filp->f_ra, index)) + page_cache_sync_readahead( + sb->s_bdev->bd_inode->i_mapping, + &filp->f_ra, filp, + index, 1); + filp->f_ra.prev_index = index; bh = ext3_bread(NULL, inode, blk, 0, &err); } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index e8ad06e28318..3ab01c04e00c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -135,12 +135,14 @@ static int ext4_readdir(struct file * filp, map_bh.b_state = 0; err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); if (err > 0) { - page_cache_readahead(sb->s_bdev->bd_inode->i_mapping, - &filp->f_ra, - filp, - map_bh.b_blocknr >> - (PAGE_CACHE_SHIFT - inode->i_blkbits), - 1); + pgoff_t index = map_bh.b_blocknr >> + (PAGE_CACHE_SHIFT - inode->i_blkbits); + if (!ra_has_index(&filp->f_ra, index)) + page_cache_sync_readahead( + sb->s_bdev->bd_inode->i_mapping, + &filp->f_ra, filp, + index, 1); + filp->f_ra.prev_index = index; bh = ext4_bread(NULL, inode, blk, 0, &err); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index de26c25d6a18..a4848e04a5ed 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2903,7 +2903,7 @@ int ext4_write_inode(struct inode *inode, int wait) return 0; if (ext4_journal_current_handle()) { - jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n"); + jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); dump_stack(); return -EIO; } diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 26c888890c24..ce90032c010e 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -251,7 +251,7 @@ static int gfs2_readpage(struct file *file, struct page *page) if (file) { gf = file->private_data; if (test_bit(GFF_EXLOCK, &gf->f_flags)) - /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ + /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ goto skip_lock; } gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 196d83266e34..1a5e8e893d75 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -489,6 +489,29 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) } /** + * gfs2_setlease - acquire/release a file lease + * @file: the file pointer + * @arg: lease type + * @fl: file lock + * + * Returns: errno + */ + +static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) +{ + struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); + + /* + * We don't currently have a way to enforce a lease across the whole + * cluster; until we do, disable leases (by just returning -EINVAL), + * unless the administrator has requested purely local locking. + */ + if (!sdp->sd_args.ar_localflocks) + return -EINVAL; + return setlease(file, arg, fl); +} + +/** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait @@ -638,6 +661,7 @@ const struct file_operations gfs2_file_fops = { .flock = gfs2_flock, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, + .setlease = gfs2_setlease, }; const struct file_operations gfs2_dir_fops = { diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 404b7cc9f8c4..927d739d4685 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -27,13 +27,12 @@ #include "trans.h" #include "util.h" -static struct page *gfs2_private_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); + struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); set_bit(GIF_PAGED, &ip->i_flags); - return filemap_nopage(area, address, type); + return filemap_fault(vma, vmf); } static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) @@ -104,58 +103,67 @@ out: return error; } -static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static int gfs2_sharewrite_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) { - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct gfs2_file *gf = file->private_data; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_holder i_gh; - struct page *result = NULL; - unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + - area->vm_pgoff; int alloc_required; int error; + int ret = 0; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) - return NULL; + goto out; set_bit(GIF_PAGED, &ip->i_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); - error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &alloc_required); - if (error) - goto out; + error = gfs2_write_alloc_required(ip, + (u64)vmf->pgoff << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &alloc_required); + if (error) { + ret = VM_FAULT_OOM; /* XXX: are these right? */ + goto out_unlock; + } set_bit(GFF_EXLOCK, &gf->f_flags); - result = filemap_nopage(area, address, type); + ret = filemap_fault(vma, vmf); clear_bit(GFF_EXLOCK, &gf->f_flags); - if (!result || result == NOPAGE_OOM) - goto out; + if (ret & VM_FAULT_ERROR) + goto out_unlock; if (alloc_required) { - error = alloc_page_backing(ip, result); + /* XXX: do we need to drop page lock around alloc_page_backing?*/ + error = alloc_page_backing(ip, vmf->page); if (error) { - page_cache_release(result); - result = NULL; - goto out; + /* + * VM_FAULT_LOCKED should always be the case for + * filemap_fault, but it may not be in a future + * implementation. + */ + if (ret & VM_FAULT_LOCKED) + unlock_page(vmf->page); + page_cache_release(vmf->page); + ret = VM_FAULT_OOM; + goto out_unlock; } - set_page_dirty(result); + set_page_dirty(vmf->page); } -out: +out_unlock: gfs2_glock_dq_uninit(&i_gh); - - return result; +out: + return ret; } struct vm_operations_struct gfs2_vm_ops_private = { - .nopage = gfs2_private_nopage, + .fault = gfs2_private_fault, }; struct vm_operations_struct gfs2_vm_ops_sharewrite = { - .nopage = gfs2_sharewrite_nopage, + .fault = gfs2_sharewrite_fault, }; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index e7730a045b93..b50be8a044eb 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal) if (!err) err = do_one_pass(journal, &info, PASS_REPLAY); - jbd_debug(0, "JBD: recovery, exit status %d, " + jbd_debug(1, "JBD: recovery, exit status %d, " "recovered transactions %u to %u\n", err, info.start_transaction, info.end_transaction); - jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", + jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", info.nr_replays, info.nr_revoke_hits, info.nr_revokes); /* Restart the log at the next transaction ID, thus invalidating @@ -298,7 +298,7 @@ int jbd2_journal_skip_recovery(journal_t *journal) #ifdef CONFIG_JBD2_DEBUG int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); #endif - jbd_debug(0, + jbd_debug(1, "JBD: ignoring %d transaction%s from the journal.\n", dropped, (dropped == 1) ? "" : "s"); journal->j_transaction_sequence = ++info.end_transaction; diff --git a/fs/locks.c b/fs/locks.c index 431a8b871fce..4f2d749ac624 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -458,22 +458,20 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static int lease_alloc(struct file *filp, int type, struct file_lock **flp) +static struct file_lock *lease_alloc(struct file *filp, int type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; if (fl == NULL) - goto out; + return ERR_PTR(error); error = lease_init(filp, type, fl); if (error) { locks_free_lock(fl); - fl = NULL; + return ERR_PTR(error); } -out: - *flp = fl; - return error; + return fl; } /* Check if two locks overlap each other. @@ -661,7 +659,7 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w return result; } -int +void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; @@ -673,14 +671,12 @@ posix_test_lock(struct file *filp, struct file_lock *fl) if (posix_locks_conflict(cfl, fl)) break; } - if (cfl) { + if (cfl) __locks_copy_lock(fl, cfl); - unlock_kernel(); - return 1; - } else + else fl->fl_type = F_UNLCK; unlock_kernel(); - return 0; + return; } EXPORT_SYMBOL(posix_test_lock); @@ -1169,9 +1165,9 @@ static void time_out_leases(struct inode *inode) * @inode: the inode of the file to return * @mode: the open mode (read or write) * - * break_lease (inlined for speed) has checked there already - * is a lease on this file. Leases are broken on a call to open() - * or truncate(). This function can sleep unless you + * break_lease (inlined for speed) has checked there already is at least + * some kind of lock (maybe a lease) on this file. Leases are broken on + * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ int __break_lease(struct inode *inode, unsigned int mode) @@ -1179,12 +1175,10 @@ int __break_lease(struct inode *inode, unsigned int mode) int error = 0, future; struct file_lock *new_fl, *flock; struct file_lock *fl; - int alloc_err; unsigned long break_time; int i_have_this_lease = 0; - alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK, - &new_fl); + new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK); lock_kernel(); @@ -1212,8 +1206,9 @@ int __break_lease(struct inode *inode, unsigned int mode) goto out; } - if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) { - error = alloc_err; + if (IS_ERR(new_fl) && !i_have_this_lease + && ((mode & O_NONBLOCK) == 0)) { + error = PTR_ERR(new_fl); goto out; } @@ -1260,7 +1255,7 @@ restart: out: unlock_kernel(); - if (!alloc_err) + if (!IS_ERR(new_fl)) locks_free_lock(new_fl); return error; } @@ -1329,7 +1324,7 @@ int fcntl_getlease(struct file *filp) } /** - * __setlease - sets a lease on an open file + * setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @flp: input - file_lock to use, output - file_lock inserted @@ -1339,18 +1334,24 @@ int fcntl_getlease(struct file *filp) * * Called with kernel lock held. */ -static int __setlease(struct file *filp, long arg, struct file_lock **flp) +int setlease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; + if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) + return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + error = security_file_lock(filp, arg); + if (error) + return error; + time_out_leases(inode); - error = -EINVAL; - if (!flp || !(*flp) || !(*flp)->fl_lmops || !(*flp)->fl_lmops->fl_break) - goto out; + BUG_ON(!(*flp)->fl_lmops->fl_break); lease = *flp; @@ -1418,39 +1419,49 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) out: return error; } +EXPORT_SYMBOL(setlease); /** - * setlease - sets a lease on an open file + * vfs_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @lease: file_lock to use * * Call this to establish a lease on the file. - * The fl_lmops fl_break function is required by break_lease + * The (*lease)->fl_lmops->fl_break operation must be set; if not, + * break_lease will oops! + * + * This will call the filesystem's setlease file method, if + * defined. Note that there is no getlease method; instead, the + * filesystem setlease method should call back to setlease() to + * add a lease to the inode's lease list, where fcntl_getlease() can + * find it. Since fcntl_getlease() only reports whether the current + * task holds a lease, a cluster filesystem need only do this for + * leases held by processes on this node. + * + * There is also no break_lease method; filesystems that + * handle their own leases shoud break leases themselves from the + * filesystem's open, create, and (on truncate) setattr methods. + * + * Warning: the only current setlease methods exist only to disable + * leases in certain cases. More vfs changes may be required to + * allow a full filesystem lease implementation. */ -int setlease(struct file *filp, long arg, struct file_lock **lease) +int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; int error; - if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) - return -EACCES; - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - error = security_file_lock(filp, arg); - if (error) - return error; - lock_kernel(); - error = __setlease(filp, arg, lease); + if (filp->f_op && filp->f_op->setlease) + error = filp->f_op->setlease(filp, arg, lease); + else + error = setlease(filp, arg, lease); unlock_kernel(); return error; } - -EXPORT_SYMBOL(setlease); +EXPORT_SYMBOL_GPL(vfs_setlease); /** * fcntl_setlease - sets a lease on an open file @@ -1469,14 +1480,6 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) struct inode *inode = dentry->d_inode; int error; - if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) - return -EACCES; - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - error = security_file_lock(filp, arg); - if (error) - return error; - locks_init_lock(&fl); error = lease_init(filp, arg, &fl); if (error) @@ -1484,15 +1487,15 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) lock_kernel(); - error = __setlease(filp, arg, &flp); + error = vfs_setlease(filp, arg, &flp); if (error || arg == F_UNLCK) goto out_unlock; error = fasync_helper(fd, filp, 1, &flp->fl_fasync); if (error < 0) { - /* remove lease just inserted by __setlease */ + /* remove lease just inserted by setlease */ flp->fl_type = F_UNLCK | F_INPROGRESS; - flp->fl_break_time = jiffies- 10; + flp->fl_break_time = jiffies - 10; time_out_leases(inode); goto out_unlock; } @@ -1597,8 +1600,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) /** * vfs_test_lock - test file byte range lock * @filp: The file to test lock for - * @fl: The lock to test - * @conf: Place to return a copy of the conflicting lock, if found + * @fl: The lock to test; also used to hold result * * Returns -ERRNO on failure. Indicates presence of conflicting lock by * setting conf->fl_type to something other than F_UNLCK. diff --git a/fs/namei.c b/fs/namei.c index defaa47c11d4..a83160acd748 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -107,6 +107,8 @@ * any extra contention... */ +static int fastcall link_path_walk(const char *name, struct nameidata *nd); + /* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. @@ -998,7 +1000,7 @@ return_err: * Retry the whole path once, forcing real lookup requests * instead of relying on the dcache. */ -int fastcall link_path_walk(const char *name, struct nameidata *nd) +static int fastcall link_path_walk(const char *name, struct nameidata *nd) { struct nameidata save = *nd; int result; @@ -1022,7 +1024,7 @@ int fastcall link_path_walk(const char *name, struct nameidata *nd) return result; } -int fastcall path_walk(const char * name, struct nameidata *nd) +static int fastcall path_walk(const char * name, struct nameidata *nd) { current->total_link_count = 0; return link_path_walk(name, nd); @@ -1172,6 +1174,37 @@ int fastcall path_lookup(const char *name, unsigned int flags, return do_path_lookup(AT_FDCWD, name, flags, nd); } +/** + * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair + * @dentry: pointer to dentry of the base directory + * @mnt: pointer to vfs mount of the base directory + * @name: pointer to file name + * @flags: lookup flags + * @nd: pointer to nameidata + */ +int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, + const char *name, unsigned int flags, + struct nameidata *nd) +{ + int retval; + + /* same as do_path_lookup */ + nd->last_type = LAST_ROOT; + nd->flags = flags; + nd->depth = 0; + + nd->mnt = mntget(mnt); + nd->dentry = dget(dentry); + + retval = path_walk(name, nd); + if (unlikely(!retval && !audit_dummy_context() && nd->dentry && + nd->dentry->d_inode)) + audit_inode(name, nd->dentry->d_inode); + + return retval; + +} + static int __path_lookup_intent_open(int dfd, const char *name, unsigned int lookup_flags, struct nameidata *nd, int open_flags, int create_mode) @@ -2774,8 +2807,8 @@ EXPORT_SYMBOL(__page_symlink); EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); EXPORT_SYMBOL(path_lookup); +EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(path_release); -EXPORT_SYMBOL(path_walk); EXPORT_SYMBOL(permission); EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 70a69115500f..a94473d3072c 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -24,31 +24,35 @@ /* * Fill in the supplied page for mmap + * XXX: how are we excluding truncate/invalidate here? Maybe need to lock + * page? */ -static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static int ncp_file_mmap_fault(struct vm_area_struct *area, + struct vm_fault *vmf) { struct file *file = area->vm_file; struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - struct page* page; char *pg_addr; unsigned int already_read; unsigned int count; int bufsize; - int pos; + int pos; /* XXX: loff_t ? */ - page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages - as long as recvmsg and memset works on it */ - if (!page) - return page; - pg_addr = kmap(page); - address &= PAGE_MASK; - pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT); + /* + * ncpfs has nothing against high pages as long + * as recvmsg and memset works on it + */ + vmf->page = alloc_page(GFP_HIGHUSER); + if (!vmf->page) + return VM_FAULT_OOM; + pg_addr = kmap(vmf->page); + pos = vmf->pgoff << PAGE_SHIFT; count = PAGE_SIZE; - if (address + PAGE_SIZE > area->vm_end) { - count = area->vm_end - address; + if ((unsigned long)vmf->virtual_address + PAGE_SIZE > area->vm_end) { + WARN_ON(1); /* shouldn't happen? */ + count = area->vm_end - (unsigned long)vmf->virtual_address; } /* what we can read in one go */ bufsize = NCP_SERVER(inode)->buffer_size; @@ -83,23 +87,21 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, if (already_read < PAGE_SIZE) memset(pg_addr + already_read, 0, PAGE_SIZE - already_read); - flush_dcache_page(page); - kunmap(page); + flush_dcache_page(vmf->page); + kunmap(vmf->page); /* * If I understand ncp_read_kernel() properly, the above always * fetches from the network, here the analogue of disk. * -- wli */ - if (type) - *type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); - return page; + return VM_FAULT_MAJOR; } static struct vm_operations_struct ncp_file_mmap = { - .nopage = ncp_file_mmap_nopage, + .fault = ncp_file_mmap_fault, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8689b736fdd9..c87dc713b5d7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -53,6 +53,7 @@ static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); +static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, @@ -69,6 +70,7 @@ const struct file_operations nfs_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .check_flags = nfs_check_flags, + .setlease = nfs_setlease, }; const struct inode_operations nfs_file_inode_operations = { @@ -400,7 +402,9 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) lock_kernel(); /* Try local locking first */ - if (posix_test_lock(filp, fl)) { + posix_test_lock(filp, fl); + if (fl->fl_type != F_UNLCK) { + /* found a conflict */ goto out; } @@ -558,3 +562,13 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_unlk(filp, cmd, fl); return do_setlk(filp, cmd, fl); } + +static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) +{ + /* + * There is no protocol support for leases, so we have no way + * to implement them correctly in the face of opens by other + * clients. + */ + return -EINVAL; +} diff --git a/fs/nfsctl.c b/fs/nfsctl.c index c043136a82ca..51f1b31acbf6 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -23,19 +23,15 @@ static struct file *do_open(char *name, int flags) { struct nameidata nd; + struct vfsmount *mnt; int error; - nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); + mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); + if (IS_ERR(mnt)) + return (struct file *)mnt; - if (IS_ERR(nd.mnt)) - return (struct file *)nd.mnt; - - nd.dentry = dget(nd.mnt->mnt_root); - nd.last_type = LAST_ROOT; - nd.flags = 0; - nd.depth = 0; - - error = path_walk(name, &nd); + error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); + mntput(mnt); /* drop do_kern_mount reference */ if (error) return ERR_PTR(error); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index cf61dc8ae942..21928056e35e 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -9,10 +9,11 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcauth.h> #include <linux/nfsd/nfsd.h> +#include <linux/nfsd/export.h> #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) -static int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) { struct exp_flavor_info *f; struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c7bbf460b009..6ab8de40904c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1265,7 +1265,7 @@ struct svc_export * rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, struct dentry *dentry) { - struct svc_export *gssexp, *exp = NULL; + struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); if (rqstp->rq_client == NULL) goto gss; @@ -1288,7 +1288,7 @@ gss: &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; - if (exp && !IS_ERR(exp)) + if (!IS_ERR(exp)) exp_put(exp); return gssexp; } @@ -1296,7 +1296,7 @@ gss: struct svc_export * rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) { - struct svc_export *gssexp, *exp = NULL; + struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); if (rqstp->rq_client == NULL) goto gss; @@ -1318,7 +1318,7 @@ gss: &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; - if (exp && !IS_ERR(exp)) + if (!IS_ERR(exp)) exp_put(exp); return gssexp; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e4a4c87ec8c6..6284807bd37e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -256,7 +256,7 @@ nfs4_close_delegation(struct nfs4_delegation *dp) /* The following nfsd_close may not actually close the file, * but we want to remove the lease in any case. */ if (dp->dl_flock) - setlease(filp, F_UNLCK, &dp->dl_flock); + vfs_setlease(filp, F_UNLCK, &dp->dl_flock); nfsd_close(filp); } @@ -1402,7 +1402,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl) /* * Set the delegation file_lock back pointer. * - * Called from __setlease() with lock_kernel() held. + * Called from setlease() with lock_kernel() held. */ static void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) @@ -1416,7 +1416,7 @@ void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) } /* - * Called from __setlease() with lock_kernel() held + * Called from setlease() with lock_kernel() held */ static int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) @@ -1716,10 +1716,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta fl.fl_file = stp->st_vfs_file; fl.fl_pid = current->tgid; - /* setlease checks to see if delegation should be handed out. + /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callbacks fl_mylease and fl_change are used */ - if ((status = setlease(stp->st_vfs_file, + if ((status = vfs_setlease(stp->st_vfs_file, flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); unhash_delegation(dp); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e90f4a8a1d01..ee96a897a29e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -120,14 +120,14 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, mntput(mnt); goto out; } - if (exp2 && ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2))) { + if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ exp_put(exp); *expp = exp2; dput(dentry); *dpp = mounts; } else { - if (exp2) exp_put(exp2); + exp_put(exp2); dput(mounts); } mntput(mnt); @@ -1797,6 +1797,11 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) return err; } +static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp) +{ + return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; +} + /* * Check for a user's access permissions to this inode. */ @@ -1833,7 +1838,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, */ if (!(acc & MAY_LOCAL_ACCESS)) if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (EX_RDONLY(exp, rqstp) || IS_RDONLY(inode)) + if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) return nfserr_rofs; if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; @@ -1916,7 +1921,7 @@ nfsd_racache_init(int cache_size) raparm_hash[i].pb_head = NULL; spin_lock_init(&raparm_hash[i].pb_lock); } - nperbucket = cache_size >> RAPARM_HASH_BITS; + nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); for (i = 0; i < cache_size - 1; i++) { if (i % nperbucket == 0) raparm_hash[j++].pb_head = raparml + i; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 84bf6e79de23..460d440310f2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -232,7 +232,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) * might now be discovering a truncate that hit on another node. * block_read_full_page->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers - * (generic_file_read, fault->nopage) are clever enough to check i_size + * (generic_file_read, vm_ops->fault) are clever enough to check i_size * and notice that the page they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index d79aa12137d2..ee64749e2eeb 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -60,31 +60,28 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) return sigprocmask(SIG_SETMASK, oldset, NULL); } -static struct page *ocfs2_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) { - struct page *page = NOPAGE_SIGBUS; sigset_t blocked, oldset; - int ret; + int error, ret; - mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, - type); + mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff); - ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); - if (ret < 0) { - mlog_errno(ret); + error = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (error < 0) { + mlog_errno(error); + ret = VM_FAULT_SIGBUS; goto out; } - page = filemap_nopage(area, address, type); + ret = filemap_fault(area, vmf); - ret = ocfs2_vm_op_unblock_sigs(&oldset); - if (ret < 0) - mlog_errno(ret); + error = ocfs2_vm_op_unblock_sigs(&oldset); + if (error < 0) + mlog_errno(error); out: - mlog_exit_ptr(page); - return page; + mlog_exit_ptr(vmf->page); + return ret; } static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, @@ -209,7 +206,7 @@ out: } static struct vm_operations_struct ocfs2_file_vm_ops = { - .nopage = ocfs2_nopage, + .fault = ocfs2_fault, .page_mkwrite = ocfs2_page_mkwrite, }; @@ -226,6 +223,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 98e0b85a9bb2..783c57ec07d3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -372,11 +372,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, { struct hd_struct *p; - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return; - memset(p, 0, sizeof(*p)); p->start_sect = start; p->nr_sects = len; p->partno = part; diff --git a/fs/proc/base.c b/fs/proc/base.c index 42cb4f5613b6..3c77d5a64e7c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -72,6 +72,7 @@ #include <linux/poll.h> #include <linux/nsproxy.h> #include <linux/oom.h> +#include <linux/elf.h> #include "internal.h" /* NOTE: @@ -1014,7 +1015,7 @@ static int task_dumpable(struct task_struct *task) task_lock(task); mm = task->mm; if (mm) - dumpable = mm->dumpable; + dumpable = get_dumpable(mm); task_unlock(task); if(dumpable == 1) return 1; @@ -1785,6 +1786,91 @@ static const struct inode_operations proc_attr_dir_inode_operations = { #endif +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct mm_struct *mm; + char buffer[PROC_NUMBUF]; + size_t len; + int ret; + + if (!task) + return -ESRCH; + + ret = 0; + mm = get_task_mm(task); + if (mm) { + len = snprintf(buffer, sizeof(buffer), "%08lx\n", + ((mm->flags & MMF_DUMP_FILTER_MASK) >> + MMF_DUMP_FILTER_SHIFT)); + mmput(mm); + ret = simple_read_from_buffer(buf, count, ppos, buffer, len); + } + + put_task_struct(task); + + return ret; +} + +static ssize_t proc_coredump_filter_write(struct file *file, + const char __user *buf, + size_t count, + loff_t *ppos) +{ + struct task_struct *task; + struct mm_struct *mm; + char buffer[PROC_NUMBUF], *end; + unsigned int val; + int ret; + int i; + unsigned long mask; + + ret = -EFAULT; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + goto out_no_task; + + ret = -EINVAL; + val = (unsigned int)simple_strtoul(buffer, &end, 0); + if (*end == '\n') + end++; + if (end - buffer == 0) + goto out_no_task; + + ret = -ESRCH; + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + goto out_no_task; + + ret = end - buffer; + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; + + for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { + if (val & mask) + set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); + else + clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); + } + + mmput(mm); + out_no_mm: + put_task_struct(task); + out_no_task: + return ret; +} + +static const struct file_operations proc_coredump_filter_operations = { + .read = proc_coredump_filter_read, + .write = proc_coredump_filter_write, +}; +#endif + /* * /proc/self: */ @@ -2005,6 +2091,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) + REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), +#endif #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, pid_io_accounting), #endif diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index d24b8d46059a..f133afebed7a 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -445,6 +445,11 @@ static int show_stat(struct seq_file *p, void *v) cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; u64 sum = 0; struct timespec boottime; + unsigned int *per_irq_sum; + + per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL); + if (!per_irq_sum) + return -ENOMEM; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -462,8 +467,11 @@ static int show_stat(struct seq_file *p, void *v) irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); - for (j = 0 ; j < NR_IRQS ; j++) - sum += kstat_cpu(i).irqs[j]; + for (j = 0; j < NR_IRQS; j++) { + unsigned int temp = kstat_cpu(i).irqs[j]; + sum += temp; + per_irq_sum[j] += temp; + } } seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n", @@ -501,7 +509,7 @@ static int show_stat(struct seq_file *p, void *v) #if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA) && !defined(CONFIG_IA64) for (i = 0; i < NR_IRQS; i++) - seq_printf(p, " %u", kstat_irqs(i)); + seq_printf(p, " %u", per_irq_sum[i]); #endif seq_printf(p, @@ -516,6 +524,7 @@ static int show_stat(struct seq_file *p, void *v) nr_running(), nr_iowait()); + kfree(per_irq_sum); return 0; } diff --git a/fs/splice.c b/fs/splice.c index 53fc2082a468..22496d2a73fa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -265,7 +265,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, unsigned int flags) { struct address_space *mapping = in->f_mapping; - unsigned int loff, nr_pages; + unsigned int loff, nr_pages, req_pages; struct page *pages[PIPE_BUFFERS]; struct partial_page partial[PIPE_BUFFERS]; struct page *page; @@ -281,28 +281,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; - nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (nr_pages > PIPE_BUFFERS) - nr_pages = PIPE_BUFFERS; - - /* - * Don't try to 2nd guess the read-ahead logic, call into - * page_cache_readahead() like the page cache reads would do. - */ - page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); + req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); /* * Lookup the (hopefully) full range of pages we need. */ spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); + index += spd.nr_pages; /* * If find_get_pages_contig() returned fewer pages than we needed, - * allocate the rest and fill in the holes. + * readahead/allocate the rest and fill in the holes. */ + if (spd.nr_pages < nr_pages) + page_cache_sync_readahead(mapping, &in->f_ra, in, + index, req_pages - spd.nr_pages); + error = 0; - index += spd.nr_pages; while (spd.nr_pages < nr_pages) { /* * Page could be there, find_get_pages_contig() breaks on @@ -311,12 +307,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, page = find_get_page(mapping, index); if (!page) { /* - * Make sure the read-ahead engine is notified - * about this failure. - */ - handle_ra_miss(mapping, &in->f_ra, index); - - /* * page didn't exist, allocate one. */ page = page_cache_alloc_cold(mapping); @@ -361,6 +351,10 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); page = pages[page_nr]; + if (PageReadahead(page)) + page_cache_async_readahead(mapping, &in->f_ra, in, + page, index, req_pages - page_nr); + /* * If the page isn't uptodate, we may need to start io on it */ @@ -453,6 +447,7 @@ fill_it: */ while (page_nr < nr_pages) page_cache_release(pages[page_nr++]); + in->f_ra.prev_index = index; if (spd.nr_pages) return splice_to_pipe(pipe, &spd); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index aee966c44aac..048e6054c2fd 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -361,20 +361,20 @@ static struct dentry_operations sysfs_dentry_ops = { struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) { char *dup_name = NULL; - struct sysfs_dirent *sd = NULL; + struct sysfs_dirent *sd; if (type & SYSFS_COPY_NAME) { name = dup_name = kstrdup(name, GFP_KERNEL); if (!name) - goto err_out; + return NULL; } sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); if (!sd) - goto err_out; + goto err_out1; if (sysfs_alloc_ino(&sd->s_ino)) - goto err_out; + goto err_out2; atomic_set(&sd->s_count, 1); atomic_set(&sd->s_active, 0); @@ -386,9 +386,10 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) return sd; - err_out: - kfree(dup_name); + err_out2: kmem_cache_free(sysfs_dir_cachep, sd); + err_out1: + kfree(dup_name); return NULL; } @@ -698,17 +699,19 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, /* link in */ sysfs_addrm_start(&acxt, parent_sd); + if (!sysfs_find_dirent(parent_sd, name)) { sysfs_add_one(&acxt, sd); sysfs_link_sibling(sd); } - if (sysfs_addrm_finish(&acxt)) { - *p_sd = sd; - return 0; + + if (!sysfs_addrm_finish(&acxt)) { + sysfs_put(sd); + return -EEXIST; } - sysfs_put(sd); - return -EEXIST; + *p_sd = sd; + return 0; } int sysfs_create_subdir(struct kobject *kobj, const char *name, diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index cc497994b2a8..3e1cc062a740 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -410,11 +410,12 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, sysfs_link_sibling(sd); } - if (sysfs_addrm_finish(&acxt)) - return 0; + if (!sysfs_addrm_finish(&acxt)) { + sysfs_put(sd); + return -EEXIST; + } - sysfs_put(sd); - return -EEXIST; + return 0; } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 3756e152285a..10d1b52899f1 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -133,7 +133,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) */ static struct lock_class_key sysfs_inode_imutex_key; -void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) +static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) { inode->i_blocks = 0; inode->i_mapping->a_ops = &sysfs_aops; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 402cc356203c..60714d075c2f 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -43,19 +43,19 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sysfs_sb = sb; - inode = new_inode(sysfs_sb); + /* get root inode, initialize and unlock it */ + inode = sysfs_get_inode(&sysfs_root); if (!inode) { pr_debug("sysfs: could not get root inode\n"); return -ENOMEM; } - sysfs_init_inode(&sysfs_root, inode); - inode->i_op = &sysfs_dir_inode_operations; inode->i_fop = &sysfs_dir_operations; - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); + inc_nlink(inode); /* directory, account for "." */ + unlock_new_inode(inode); + /* instantiate and link root dentry */ root = d_alloc_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n",__FUNCTION__); diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 2f86e0422290..4ce687f0b5d0 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -86,7 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); if (!sd) goto out_put; + sd->s_elem.symlink.target_sd = target_sd; + target_sd = NULL; /* reference is now owned by the symlink */ sysfs_addrm_start(&acxt, parent_sd); @@ -95,11 +97,13 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char sysfs_link_sibling(sd); } - if (sysfs_addrm_finish(&acxt)) - return 0; + if (!sysfs_addrm_finish(&acxt)) { + error = -EEXIST; + goto out_put; + } + + return 0; - error = -EEXIST; - /* fall through */ out_put: sysfs_put(target_sd); sysfs_put(sd); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 6a37f2386a8d..6b8c8d76d308 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -71,7 +71,6 @@ extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); extern void sysfs_delete_inode(struct inode *inode); -extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode); extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd); extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode); diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 4cec91015681..ef48d094dd2b 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -41,18 +41,17 @@ #define uint(x) xuint(x) #define xuint(x) __le ## x -static inline int find_next_one_bit (void * addr, int size, int offset) +static inline int find_next_one_bit(void *addr, int size, int offset) { - uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG); - int result = offset & ~(BITS_PER_LONG-1); + uintBPL_t *p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG); + int result = offset & ~(BITS_PER_LONG - 1); unsigned long tmp; if (offset >= size) return size; size -= result; - offset &= (BITS_PER_LONG-1); - if (offset) - { + offset &= (BITS_PER_LONG - 1); + if (offset) { tmp = leBPL_to_cpup(p++); tmp &= ~0UL << offset; if (size < BITS_PER_LONG) @@ -62,8 +61,7 @@ static inline int find_next_one_bit (void * addr, int size, int offset) size -= BITS_PER_LONG; result += BITS_PER_LONG; } - while (size & ~(BITS_PER_LONG-1)) - { + while (size & ~(BITS_PER_LONG - 1)) { if ((tmp = leBPL_to_cpup(p++))) goto found_middle; result += BITS_PER_LONG; @@ -72,17 +70,18 @@ static inline int find_next_one_bit (void * addr, int size, int offset) if (!size) return result; tmp = leBPL_to_cpup(p); -found_first: - tmp &= ~0UL >> (BITS_PER_LONG-size); -found_middle: + found_first: + tmp &= ~0UL >> (BITS_PER_LONG - size); + found_middle: return result + ffz(~tmp); } #define find_first_one_bit(addr, size)\ find_next_one_bit((addr), (size), 0) -static int read_block_bitmap(struct super_block * sb, - struct udf_bitmap *bitmap, unsigned int block, unsigned long bitmap_nr) +static int read_block_bitmap(struct super_block *sb, + struct udf_bitmap *bitmap, unsigned int block, + unsigned long bitmap_nr) { struct buffer_head *bh = NULL; int retval = 0; @@ -92,38 +91,39 @@ static int read_block_bitmap(struct super_block * sb, loc.partitionReferenceNum = UDF_SB_PARTITION(sb); bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block)); - if (!bh) - { + if (!bh) { retval = -EIO; } bitmap->s_block_bitmap[bitmap_nr] = bh; return retval; } -static int __load_block_bitmap(struct super_block * sb, - struct udf_bitmap *bitmap, unsigned int block_group) +static int __load_block_bitmap(struct super_block *sb, + struct udf_bitmap *bitmap, + unsigned int block_group) { int retval = 0; int nr_groups = bitmap->s_nr_groups; - if (block_group >= nr_groups) - { - udf_debug("block_group (%d) > nr_groups (%d)\n", block_group, nr_groups); + if (block_group >= nr_groups) { + udf_debug("block_group (%d) > nr_groups (%d)\n", block_group, + nr_groups); } if (bitmap->s_block_bitmap[block_group]) return block_group; - else - { - retval = read_block_bitmap(sb, bitmap, block_group, block_group); + else { + retval = + read_block_bitmap(sb, bitmap, block_group, block_group); if (retval < 0) return retval; return block_group; } } -static inline int load_block_bitmap(struct super_block * sb, - struct udf_bitmap *bitmap, unsigned int block_group) +static inline int load_block_bitmap(struct super_block *sb, + struct udf_bitmap *bitmap, + unsigned int block_group) { int slot; @@ -138,13 +138,14 @@ static inline int load_block_bitmap(struct super_block * sb, return slot; } -static void udf_bitmap_free_blocks(struct super_block * sb, - struct inode * inode, - struct udf_bitmap *bitmap, - kernel_lb_addr bloc, uint32_t offset, uint32_t count) +static void udf_bitmap_free_blocks(struct super_block *sb, + struct inode *inode, + struct udf_bitmap *bitmap, + kernel_lb_addr bloc, uint32_t offset, + uint32_t count) { struct udf_sb_info *sbi = UDF_SB(sb); - struct buffer_head * bh = NULL; + struct buffer_head *bh = NULL; unsigned long block; unsigned long block_group; unsigned long bit; @@ -154,17 +155,22 @@ static void udf_bitmap_free_blocks(struct super_block * sb, mutex_lock(&sbi->s_alloc_mutex); if (bloc.logicalBlockNum < 0 || - (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) - { - udf_debug("%d < %d || %d + %d > %d\n", - bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, - UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)); + (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, + bloc. + partitionReferenceNum)) + { + udf_debug("%d < %d || %d + %d > %d\n", bloc.logicalBlockNum, 0, + bloc.logicalBlockNum, count, UDF_SB_PARTLEN(sb, + bloc. + partitionReferenceNum)); goto error_return; } - block = bloc.logicalBlockNum + offset + (sizeof(struct spaceBitmapDesc) << 3); + block = + bloc.logicalBlockNum + offset + + (sizeof(struct spaceBitmapDesc) << 3); -do_more: + do_more: overflow = 0; block_group = block >> (sb->s_blocksize_bits + 3); bit = block % (sb->s_blocksize << 3); @@ -172,8 +178,7 @@ do_more: /* * Check to see if we are freeing blocks across a group boundary. */ - if (bit + count > (sb->s_blocksize << 3)) - { + if (bit + count > (sb->s_blocksize << 3)) { overflow = bit + count - (sb->s_blocksize << 3); count -= overflow; } @@ -182,32 +187,31 @@ do_more: goto error_return; bh = bitmap->s_block_bitmap[bitmap_nr]; - for (i=0; i < count; i++) - { - if (udf_set_bit(bit + i, bh->b_data)) - { + for (i = 0; i < count; i++) { + if (udf_set_bit(bit + i, bh->b_data)) { udf_debug("bit %ld already set\n", bit + i); - udf_debug("byte=%2x\n", ((char *)bh->b_data)[(bit + i) >> 3]); - } - else - { + udf_debug("byte=%2x\n", + ((char *)bh->b_data)[(bit + i) >> 3]); + } else { if (inode) DQUOT_FREE_BLOCK(inode, 1); - if (UDF_SB_LVIDBH(sb)) - { - UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)])+1); + if (UDF_SB_LVIDBH(sb)) { + UDF_SB_LVID(sb)-> + freeSpaceTable[UDF_SB_PARTITION(sb)] = + cpu_to_le32(le32_to_cpu + (UDF_SB_LVID(sb)-> + freeSpaceTable[UDF_SB_PARTITION + (sb)]) + 1); } } } mark_buffer_dirty(bh); - if (overflow) - { + if (overflow) { block += count; count = overflow; goto do_more; } -error_return: + error_return: sb->s_dirt = 1; if (UDF_SB_LVIDBH(sb)) mark_buffer_dirty(UDF_SB_LVIDBH(sb)); @@ -215,10 +219,11 @@ error_return: return; } -static int udf_bitmap_prealloc_blocks(struct super_block * sb, - struct inode * inode, - struct udf_bitmap *bitmap, uint16_t partition, uint32_t first_block, - uint32_t block_count) +static int udf_bitmap_prealloc_blocks(struct super_block *sb, + struct inode *inode, + struct udf_bitmap *bitmap, + uint16_t partition, uint32_t first_block, + uint32_t block_count) { struct udf_sb_info *sbi = UDF_SB(sb); int alloc_count = 0; @@ -233,9 +238,10 @@ static int udf_bitmap_prealloc_blocks(struct super_block * sb, if (first_block + block_count > UDF_SB_PARTLEN(sb, partition)) block_count = UDF_SB_PARTLEN(sb, partition) - first_block; -repeat: + repeat: nr_groups = (UDF_SB_PARTLEN(sb, partition) + - (sizeof(struct spaceBitmapDesc) << 3) + (sb->s_blocksize * 8) - 1) / (sb->s_blocksize * 8); + (sizeof(struct spaceBitmapDesc) << 3) + + (sb->s_blocksize * 8) - 1) / (sb->s_blocksize * 8); block = first_block + (sizeof(struct spaceBitmapDesc) << 3); block_group = block >> (sb->s_blocksize_bits + 3); group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc); @@ -247,31 +253,30 @@ repeat: bit = block % (sb->s_blocksize << 3); - while (bit < (sb->s_blocksize << 3) && block_count > 0) - { + while (bit < (sb->s_blocksize << 3) && block_count > 0) { if (!udf_test_bit(bit, bh->b_data)) goto out; else if (DQUOT_PREALLOC_BLOCK(inode, 1)) goto out; - else if (!udf_clear_bit(bit, bh->b_data)) - { + else if (!udf_clear_bit(bit, bh->b_data)) { udf_debug("bit already cleared for block %d\n", bit); DQUOT_FREE_BLOCK(inode, 1); goto out; } - block_count --; - alloc_count ++; - bit ++; - block ++; + block_count--; + alloc_count++; + bit++; + block++; } mark_buffer_dirty(bh); if (block_count > 0) goto repeat; -out: - if (UDF_SB_LVIDBH(sb)) - { + out: + if (UDF_SB_LVIDBH(sb)) { UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition])-alloc_count); + cpu_to_le32(le32_to_cpu + (UDF_SB_LVID(sb)->freeSpaceTable[partition]) - + alloc_count); mark_buffer_dirty(UDF_SB_LVIDBH(sb)); } sb->s_dirt = 1; @@ -279,12 +284,13 @@ out: return alloc_count; } -static int udf_bitmap_new_block(struct super_block * sb, - struct inode * inode, - struct udf_bitmap *bitmap, uint16_t partition, uint32_t goal, int *err) +static int udf_bitmap_new_block(struct super_block *sb, + struct inode *inode, + struct udf_bitmap *bitmap, uint16_t partition, + uint32_t goal, int *err) { struct udf_sb_info *sbi = UDF_SB(sb); - int newbit, bit=0, block, block_group, group_start; + int newbit, bit = 0, block, block_group, group_start; int end_goal, nr_groups, bitmap_nr, i; struct buffer_head *bh = NULL; char *ptr; @@ -293,7 +299,7 @@ static int udf_bitmap_new_block(struct super_block * sb, *err = -ENOSPC; mutex_lock(&sbi->s_alloc_mutex); -repeat: + repeat: if (goal < 0 || goal >= UDF_SB_PARTLEN(sb, partition)) goal = 0; @@ -306,38 +312,39 @@ repeat: if (bitmap_nr < 0) goto error_return; bh = bitmap->s_block_bitmap[bitmap_nr]; - ptr = memscan((char *)bh->b_data + group_start, 0xFF, sb->s_blocksize - group_start); + ptr = + memscan((char *)bh->b_data + group_start, 0xFF, + sb->s_blocksize - group_start); - if ((ptr - ((char *)bh->b_data)) < sb->s_blocksize) - { + if ((ptr - ((char *)bh->b_data)) < sb->s_blocksize) { bit = block % (sb->s_blocksize << 3); - if (udf_test_bit(bit, bh->b_data)) - { + if (udf_test_bit(bit, bh->b_data)) { goto got_block; } end_goal = (bit + 63) & ~63; bit = udf_find_next_one_bit(bh->b_data, end_goal, bit); if (bit < end_goal) goto got_block; - ptr = memscan((char *)bh->b_data + (bit >> 3), 0xFF, sb->s_blocksize - ((bit + 7) >> 3)); + ptr = + memscan((char *)bh->b_data + (bit >> 3), 0xFF, + sb->s_blocksize - ((bit + 7) >> 3)); newbit = (ptr - ((char *)bh->b_data)) << 3; - if (newbit < sb->s_blocksize << 3) - { + if (newbit < sb->s_blocksize << 3) { bit = newbit; goto search_back; } - newbit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, bit); - if (newbit < sb->s_blocksize << 3) - { + newbit = + udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, + bit); + if (newbit < sb->s_blocksize << 3) { bit = newbit; goto got_block; } } - for (i=0; i<(nr_groups*2); i++) - { - block_group ++; + for (i = 0; i < (nr_groups * 2); i++) { + block_group++; if (block_group >= nr_groups) block_group = 0; group_start = block_group ? 0 : sizeof(struct spaceBitmapDesc); @@ -346,67 +353,69 @@ repeat: if (bitmap_nr < 0) goto error_return; bh = bitmap->s_block_bitmap[bitmap_nr]; - if (i < nr_groups) - { - ptr = memscan((char *)bh->b_data + group_start, 0xFF, sb->s_blocksize - group_start); - if ((ptr - ((char *)bh->b_data)) < sb->s_blocksize) - { + if (i < nr_groups) { + ptr = + memscan((char *)bh->b_data + group_start, 0xFF, + sb->s_blocksize - group_start); + if ((ptr - ((char *)bh->b_data)) < sb->s_blocksize) { bit = (ptr - ((char *)bh->b_data)) << 3; break; } - } - else - { - bit = udf_find_next_one_bit((char *)bh->b_data, sb->s_blocksize << 3, group_start << 3); + } else { + bit = + udf_find_next_one_bit((char *)bh->b_data, + sb->s_blocksize << 3, + group_start << 3); if (bit < sb->s_blocksize << 3) break; } } - if (i >= (nr_groups*2)) - { + if (i >= (nr_groups * 2)) { mutex_unlock(&sbi->s_alloc_mutex); return newblock; } if (bit < sb->s_blocksize << 3) goto search_back; else - bit = udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, group_start << 3); - if (bit >= sb->s_blocksize << 3) - { + bit = + udf_find_next_one_bit(bh->b_data, sb->s_blocksize << 3, + group_start << 3); + if (bit >= sb->s_blocksize << 3) { mutex_unlock(&sbi->s_alloc_mutex); return 0; } -search_back: - for (i=0; i<7 && bit > (group_start << 3) && udf_test_bit(bit - 1, bh->b_data); i++, bit--); + search_back: + for (i = 0; + i < 7 && bit > (group_start << 3) + && udf_test_bit(bit - 1, bh->b_data); i++, bit--) ; -got_block: + got_block: /* * Check quota for allocation of this block. */ - if (inode && DQUOT_ALLOC_BLOCK(inode, 1)) - { + if (inode && DQUOT_ALLOC_BLOCK(inode, 1)) { mutex_unlock(&sbi->s_alloc_mutex); *err = -EDQUOT; return 0; } newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) - - (sizeof(struct spaceBitmapDesc) << 3); + (sizeof(struct spaceBitmapDesc) << 3); - if (!udf_clear_bit(bit, bh->b_data)) - { + if (!udf_clear_bit(bit, bh->b_data)) { udf_debug("bit already cleared for block %d\n", bit); goto repeat; } mark_buffer_dirty(bh); - if (UDF_SB_LVIDBH(sb)) - { + if (UDF_SB_LVIDBH(sb)) { UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition])-1); + cpu_to_le32(le32_to_cpu + (UDF_SB_LVID(sb)->freeSpaceTable[partition]) - + 1); mark_buffer_dirty(UDF_SB_LVIDBH(sb)); } sb->s_dirt = 1; @@ -414,16 +423,17 @@ got_block: *err = 0; return newblock; -error_return: + error_return: *err = -EIO; mutex_unlock(&sbi->s_alloc_mutex); return 0; } -static void udf_table_free_blocks(struct super_block * sb, - struct inode * inode, - struct inode * table, - kernel_lb_addr bloc, uint32_t offset, uint32_t count) +static void udf_table_free_blocks(struct super_block *sb, + struct inode *inode, + struct inode *table, + kernel_lb_addr bloc, uint32_t offset, + uint32_t count) { struct udf_sb_info *sbi = UDF_SB(sb); uint32_t start, end; @@ -435,11 +445,14 @@ static void udf_table_free_blocks(struct super_block * sb, mutex_lock(&sbi->s_alloc_mutex); if (bloc.logicalBlockNum < 0 || - (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) - { - udf_debug("%d < %d || %d + %d > %d\n", - bloc.logicalBlockNum, 0, bloc.logicalBlockNum, count, - UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)); + (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, + bloc. + partitionReferenceNum)) + { + udf_debug("%d < %d || %d + %d > %d\n", bloc.logicalBlockNum, 0, + bloc.logicalBlockNum, count, UDF_SB_PARTLEN(sb, + bloc. + partitionReferenceNum)); goto error_return; } @@ -447,10 +460,11 @@ static void udf_table_free_blocks(struct super_block * sb, but.. oh well */ if (inode) DQUOT_FREE_BLOCK(inode, count); - if (UDF_SB_LVIDBH(sb)) - { + if (UDF_SB_LVIDBH(sb)) { UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)])+count); + cpu_to_le32(le32_to_cpu + (UDF_SB_LVID(sb)-> + freeSpaceTable[UDF_SB_PARTITION(sb)]) + count); mark_buffer_dirty(UDF_SB_LVIDBH(sb)); } @@ -463,73 +477,75 @@ static void udf_table_free_blocks(struct super_block * sb, epos.bh = oepos.bh = NULL; while (count && (etype = - udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) - { + udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { if (((eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) == - start)) - { - if ((0x3FFFFFFF - elen) < (count << sb->s_blocksize_bits)) - { - count -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - start += ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - elen = (etype << 30) | (0x40000000 - sb->s_blocksize); - } - else - { + start)) { + if ((0x3FFFFFFF - elen) < + (count << sb->s_blocksize_bits)) { + count -= + ((0x3FFFFFFF - + elen) >> sb->s_blocksize_bits); + start += + ((0x3FFFFFFF - + elen) >> sb->s_blocksize_bits); + elen = + (etype << 30) | (0x40000000 - + sb->s_blocksize); + } else { elen = (etype << 30) | - (elen + (count << sb->s_blocksize_bits)); + (elen + (count << sb->s_blocksize_bits)); start += count; count = 0; } udf_write_aext(table, &oepos, eloc, elen, 1); - } - else if (eloc.logicalBlockNum == (end + 1)) - { - if ((0x3FFFFFFF - elen) < (count << sb->s_blocksize_bits)) - { - count -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - end -= ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); + } else if (eloc.logicalBlockNum == (end + 1)) { + if ((0x3FFFFFFF - elen) < + (count << sb->s_blocksize_bits)) { + count -= + ((0x3FFFFFFF - + elen) >> sb->s_blocksize_bits); + end -= + ((0x3FFFFFFF - + elen) >> sb->s_blocksize_bits); eloc.logicalBlockNum -= - ((0x3FFFFFFF - elen) >> sb->s_blocksize_bits); - elen = (etype << 30) | (0x40000000 - sb->s_blocksize); - } - else - { + ((0x3FFFFFFF - + elen) >> sb->s_blocksize_bits); + elen = + (etype << 30) | (0x40000000 - + sb->s_blocksize); + } else { eloc.logicalBlockNum = start; elen = (etype << 30) | - (elen + (count << sb->s_blocksize_bits)); + (elen + (count << sb->s_blocksize_bits)); end -= count; count = 0; } udf_write_aext(table, &oepos, eloc, elen, 1); } - if (epos.bh != oepos.bh) - { + if (epos.bh != oepos.bh) { i = -1; oepos.block = epos.block; brelse(oepos.bh); get_bh(epos.bh); oepos.bh = epos.bh; oepos.offset = 0; - } - else + } else oepos.offset = epos.offset; } - if (count) - { + if (count) { /* NOTE: we CANNOT use udf_add_aext here, as it can try to allocate - a new block, and since we hold the super block lock already - very bad things would happen :) + a new block, and since we hold the super block lock already + very bad things would happen :) - We copy the behavior of udf_add_aext, but instead of - trying to allocate a new block close to the existing one, - we just steal a block from the extent we are trying to add. + We copy the behavior of udf_add_aext, but instead of + trying to allocate a new block close to the existing one, + we just steal a block from the extent we are trying to add. - It would be nice if the blocks were close together, but it - isn't required. - */ + It would be nice if the blocks were close together, but it + isn't required. + */ int adsize; short_ad *sad = NULL; @@ -537,121 +553,124 @@ static void udf_table_free_blocks(struct super_block * sb, struct allocExtDesc *aed; eloc.logicalBlockNum = start; - elen = EXT_RECORDED_ALLOCATED | - (count << sb->s_blocksize_bits); + elen = EXT_RECORDED_ALLOCATED | (count << sb->s_blocksize_bits); if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(short_ad); else if (UDF_I_ALLOCTYPE(table) == ICBTAG_FLAG_AD_LONG) adsize = sizeof(long_ad); - else - { + else { brelse(oepos.bh); brelse(epos.bh); goto error_return; } - if (epos.offset + (2 * adsize) > sb->s_blocksize) - { + if (epos.offset + (2 * adsize) > sb->s_blocksize) { char *sptr, *dptr; int loffset; - + brelse(oepos.bh); oepos = epos; /* Steal a block from the extent being free'd */ epos.block.logicalBlockNum = eloc.logicalBlockNum; - eloc.logicalBlockNum ++; + eloc.logicalBlockNum++; elen -= sb->s_blocksize; if (!(epos.bh = udf_tread(sb, - udf_get_lb_pblock(sb, epos.block, 0)))) - { + udf_get_lb_pblock(sb, + epos.block, + 0)))) { brelse(oepos.bh); goto error_return; } aed = (struct allocExtDesc *)(epos.bh->b_data); - aed->previousAllocExtLocation = cpu_to_le32(oepos.block.logicalBlockNum); - if (epos.offset + adsize > sb->s_blocksize) - { + aed->previousAllocExtLocation = + cpu_to_le32(oepos.block.logicalBlockNum); + if (epos.offset + adsize > sb->s_blocksize) { loffset = epos.offset; aed->lengthAllocDescs = cpu_to_le32(adsize); sptr = UDF_I_DATA(inode) + epos.offset - - udf_file_entry_alloc_offset(inode) + - UDF_I_LENEATTR(inode) - adsize; - dptr = epos.bh->b_data + sizeof(struct allocExtDesc); + udf_file_entry_alloc_offset(inode) + + UDF_I_LENEATTR(inode) - adsize; + dptr = + epos.bh->b_data + + sizeof(struct allocExtDesc); memcpy(dptr, sptr, adsize); - epos.offset = sizeof(struct allocExtDesc) + adsize; - } - else - { + epos.offset = + sizeof(struct allocExtDesc) + adsize; + } else { loffset = epos.offset + adsize; aed->lengthAllocDescs = cpu_to_le32(0); sptr = oepos.bh->b_data + epos.offset; epos.offset = sizeof(struct allocExtDesc); - if (oepos.bh) - { - aed = (struct allocExtDesc *)oepos.bh->b_data; + if (oepos.bh) { + aed = + (struct allocExtDesc *)oepos.bh-> + b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); - } - else - { + cpu_to_le32(le32_to_cpu + (aed-> + lengthAllocDescs) + + adsize); + } else { UDF_I_LENALLOC(table) += adsize; mark_inode_dirty(table); } } if (UDF_SB_UDFREV(sb) >= 0x0200) - udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1, - epos.block.logicalBlockNum, sizeof(tag)); + udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, + 1, epos.block.logicalBlockNum, + sizeof(tag)); else - udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 2, 1, - epos.block.logicalBlockNum, sizeof(tag)); - switch (UDF_I_ALLOCTYPE(table)) - { - case ICBTAG_FLAG_AD_SHORT: + udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 2, + 1, epos.block.logicalBlockNum, + sizeof(tag)); + switch (UDF_I_ALLOCTYPE(table)) { + case ICBTAG_FLAG_AD_SHORT: { - sad = (short_ad *)sptr; - sad->extLength = cpu_to_le32( - EXT_NEXT_EXTENT_ALLOCDECS | - sb->s_blocksize); - sad->extPosition = cpu_to_le32(epos.block.logicalBlockNum); + sad = (short_ad *) sptr; + sad->extLength = + cpu_to_le32 + (EXT_NEXT_EXTENT_ALLOCDECS | sb-> + s_blocksize); + sad->extPosition = + cpu_to_le32(epos.block. + logicalBlockNum); break; } - case ICBTAG_FLAG_AD_LONG: + case ICBTAG_FLAG_AD_LONG: { - lad = (long_ad *)sptr; - lad->extLength = cpu_to_le32( - EXT_NEXT_EXTENT_ALLOCDECS | - sb->s_blocksize); - lad->extLocation = cpu_to_lelb(epos.block); + lad = (long_ad *) sptr; + lad->extLength = + cpu_to_le32 + (EXT_NEXT_EXTENT_ALLOCDECS | sb-> + s_blocksize); + lad->extLocation = + cpu_to_lelb(epos.block); break; } } - if (oepos.bh) - { + if (oepos.bh) { udf_update_tag(oepos.bh->b_data, loffset); mark_buffer_dirty(oepos.bh); - } - else + } else mark_inode_dirty(table); } - if (elen) /* It's possible that stealing the block emptied the extent */ - { + if (elen) { /* It's possible that stealing the block emptied the extent */ udf_write_aext(table, &epos, eloc, elen, 1); - if (!epos.bh) - { + if (!epos.bh) { UDF_I_LENALLOC(table) += adsize; mark_inode_dirty(table); - } - else - { + } else { aed = (struct allocExtDesc *)epos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); + cpu_to_le32(le32_to_cpu + (aed->lengthAllocDescs) + + adsize); udf_update_tag(epos.bh->b_data, epos.offset); mark_buffer_dirty(epos.bh); } @@ -661,16 +680,16 @@ static void udf_table_free_blocks(struct super_block * sb, brelse(epos.bh); brelse(oepos.bh); -error_return: + error_return: sb->s_dirt = 1; mutex_unlock(&sbi->s_alloc_mutex); return; } -static int udf_table_prealloc_blocks(struct super_block * sb, - struct inode * inode, - struct inode *table, uint16_t partition, uint32_t first_block, - uint32_t block_count) +static int udf_table_prealloc_blocks(struct super_block *sb, + struct inode *inode, + struct inode *table, uint16_t partition, + uint32_t first_block, uint32_t block_count) { struct udf_sb_info *sbi = UDF_SB(sb); int alloc_count = 0; @@ -696,39 +715,46 @@ static int udf_table_prealloc_blocks(struct super_block * sb, eloc.logicalBlockNum = 0xFFFFFFFF; while (first_block != eloc.logicalBlockNum && (etype = - udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) - { + udf_next_aext(table, + &epos, + &eloc, + &elen, + 1)) != + -1) { udf_debug("eloc=%d, elen=%d, first_block=%d\n", - eloc.logicalBlockNum, elen, first_block); - ; /* empty loop body */ + eloc.logicalBlockNum, elen, first_block); + ; /* empty loop body */ } - if (first_block == eloc.logicalBlockNum) - { + if (first_block == eloc.logicalBlockNum) { epos.offset -= adsize; alloc_count = (elen >> sb->s_blocksize_bits); - if (inode && DQUOT_PREALLOC_BLOCK(inode, alloc_count > block_count ? block_count : alloc_count)) + if (inode + && DQUOT_PREALLOC_BLOCK(inode, + alloc_count > + block_count ? block_count : + alloc_count)) alloc_count = 0; - else if (alloc_count > block_count) - { + else if (alloc_count > block_count) { alloc_count = block_count; eloc.logicalBlockNum += alloc_count; elen -= (alloc_count << sb->s_blocksize_bits); - udf_write_aext(table, &epos, eloc, (etype << 30) | elen, 1); - } - else - udf_delete_aext(table, epos, eloc, (etype << 30) | elen); - } - else + udf_write_aext(table, &epos, eloc, (etype << 30) | elen, + 1); + } else + udf_delete_aext(table, epos, eloc, + (etype << 30) | elen); + } else alloc_count = 0; brelse(epos.bh); - if (alloc_count && UDF_SB_LVIDBH(sb)) - { + if (alloc_count && UDF_SB_LVIDBH(sb)) { UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition])-alloc_count); + cpu_to_le32(le32_to_cpu + (UDF_SB_LVID(sb)->freeSpaceTable[partition]) - + alloc_count); mark_buffer_dirty(UDF_SB_LVIDBH(sb)); sb->s_dirt = 1; } @@ -736,9 +762,10 @@ static int udf_table_prealloc_blocks(struct super_block * sb, return alloc_count; } -static int udf_table_new_block(struct super_block * sb, - struct inode * inode, - struct inode *table, uint16_t partition, uint32_t goal, int *err) +static int udf_table_new_block(struct super_block *sb, + struct inode *inode, + struct inode *table, uint16_t partition, + uint32_t goal, int *err) { struct udf_sb_info *sbi = UDF_SB(sb); uint32_t spread = 0xFFFFFFFF, nspread = 0xFFFFFFFF; @@ -765,30 +792,27 @@ static int udf_table_new_block(struct super_block * sb, we stop. Otherwise we keep going till we run out of extents. We store the buffer_head, bloc, and extoffset of the current closest match and use that when we are done. - */ + */ epos.offset = sizeof(struct unallocSpaceEntry); epos.block = UDF_I_LOCATION(table); epos.bh = goal_epos.bh = NULL; while (spread && (etype = - udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) - { - if (goal >= eloc.logicalBlockNum) - { - if (goal < eloc.logicalBlockNum + (elen >> sb->s_blocksize_bits)) + udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) { + if (goal >= eloc.logicalBlockNum) { + if (goal < + eloc.logicalBlockNum + + (elen >> sb->s_blocksize_bits)) nspread = 0; else nspread = goal - eloc.logicalBlockNum - - (elen >> sb->s_blocksize_bits); - } - else + (elen >> sb->s_blocksize_bits); + } else nspread = eloc.logicalBlockNum - goal; - if (nspread < spread) - { + if (nspread < spread) { spread = nspread; - if (goal_epos.bh != epos.bh) - { + if (goal_epos.bh != epos.bh) { brelse(goal_epos.bh); goal_epos.bh = epos.bh; get_bh(goal_epos.bh); @@ -802,8 +826,7 @@ static int udf_table_new_block(struct super_block * sb, brelse(epos.bh); - if (spread == 0xFFFFFFFF) - { + if (spread == 0xFFFFFFFF) { brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); return 0; @@ -815,11 +838,10 @@ static int udf_table_new_block(struct super_block * sb, /* This works, but very poorly.... */ newblock = goal_eloc.logicalBlockNum; - goal_eloc.logicalBlockNum ++; + goal_eloc.logicalBlockNum++; goal_elen -= sb->s_blocksize; - if (inode && DQUOT_ALLOC_BLOCK(inode, 1)) - { + if (inode && DQUOT_ALLOC_BLOCK(inode, 1)) { brelse(goal_epos.bh); mutex_unlock(&sbi->s_alloc_mutex); *err = -EDQUOT; @@ -832,10 +854,11 @@ static int udf_table_new_block(struct super_block * sb, udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); brelse(goal_epos.bh); - if (UDF_SB_LVIDBH(sb)) - { + if (UDF_SB_LVIDBH(sb)) { UDF_SB_LVID(sb)->freeSpaceTable[partition] = - cpu_to_le32(le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[partition])-1); + cpu_to_le32(le32_to_cpu + (UDF_SB_LVID(sb)->freeSpaceTable[partition]) - + 1); mark_buffer_dirty(UDF_SB_LVIDBH(sb)); } @@ -845,105 +868,99 @@ static int udf_table_new_block(struct super_block * sb, return newblock; } -inline void udf_free_blocks(struct super_block * sb, - struct inode * inode, - kernel_lb_addr bloc, uint32_t offset, uint32_t count) +inline void udf_free_blocks(struct super_block *sb, + struct inode *inode, + kernel_lb_addr bloc, uint32_t offset, + uint32_t count) { uint16_t partition = bloc.partitionReferenceNum; - if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) - { + if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { return udf_bitmap_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, - bloc, offset, count); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) - { + UDF_SB_PARTMAPS(sb)[partition]. + s_uspace.s_bitmap, bloc, offset, + count); + } else if (UDF_SB_PARTFLAGS(sb, partition) & + UDF_PART_FLAG_UNALLOC_TABLE) { return udf_table_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table, - bloc, offset, count); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) - { + UDF_SB_PARTMAPS(sb)[partition]. + s_uspace.s_table, bloc, offset, + count); + } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) { return udf_bitmap_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap, - bloc, offset, count); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) - { + UDF_SB_PARTMAPS(sb)[partition]. + s_fspace.s_bitmap, bloc, offset, + count); + } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) { return udf_table_free_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table, - bloc, offset, count); - } - else + UDF_SB_PARTMAPS(sb)[partition]. + s_fspace.s_table, bloc, offset, + count); + } else return; } -inline int udf_prealloc_blocks(struct super_block * sb, - struct inode * inode, - uint16_t partition, uint32_t first_block, uint32_t block_count) +inline int udf_prealloc_blocks(struct super_block *sb, + struct inode *inode, + uint16_t partition, uint32_t first_block, + uint32_t block_count) { - if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) - { + if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { return udf_bitmap_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, - partition, first_block, block_count); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) - { + UDF_SB_PARTMAPS(sb) + [partition].s_uspace.s_bitmap, + partition, first_block, + block_count); + } else if (UDF_SB_PARTFLAGS(sb, partition) & + UDF_PART_FLAG_UNALLOC_TABLE) { return udf_table_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table, - partition, first_block, block_count); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) - { + UDF_SB_PARTMAPS(sb)[partition]. + s_uspace.s_table, partition, + first_block, block_count); + } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) { return udf_bitmap_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap, - partition, first_block, block_count); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) - { + UDF_SB_PARTMAPS(sb) + [partition].s_fspace.s_bitmap, + partition, first_block, + block_count); + } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) { return udf_table_prealloc_blocks(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table, - partition, first_block, block_count); - } - else + UDF_SB_PARTMAPS(sb)[partition]. + s_fspace.s_table, partition, + first_block, block_count); + } else return 0; } -inline int udf_new_block(struct super_block * sb, - struct inode * inode, - uint16_t partition, uint32_t goal, int *err) +inline int udf_new_block(struct super_block *sb, + struct inode *inode, + uint16_t partition, uint32_t goal, int *err) { int ret; - if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) - { + if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) { ret = udf_bitmap_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_bitmap, - partition, goal, err); + UDF_SB_PARTMAPS(sb)[partition]. + s_uspace.s_bitmap, partition, goal, + err); return ret; - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_UNALLOC_TABLE) - { + } else if (UDF_SB_PARTFLAGS(sb, partition) & + UDF_PART_FLAG_UNALLOC_TABLE) { return udf_table_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_uspace.s_table, - partition, goal, err); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) - { + UDF_SB_PARTMAPS(sb)[partition]. + s_uspace.s_table, partition, goal, + err); + } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_BITMAP) { return udf_bitmap_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_bitmap, - partition, goal, err); - } - else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) - { + UDF_SB_PARTMAPS(sb)[partition]. + s_fspace.s_bitmap, partition, goal, + err); + } else if (UDF_SB_PARTFLAGS(sb, partition) & UDF_PART_FLAG_FREED_TABLE) { return udf_table_new_block(sb, inode, - UDF_SB_PARTMAPS(sb)[partition].s_fspace.s_table, - partition, goal, err); - } - else - { + UDF_SB_PARTMAPS(sb)[partition]. + s_fspace.s_table, partition, goal, + err); + } else { *err = -EIO; return 0; } diff --git a/fs/udf/crc.c b/fs/udf/crc.c index ef2bfaa19d75..ae3d49790941 100644 --- a/fs/udf/crc.c +++ b/fs/udf/crc.c @@ -79,8 +79,7 @@ static uint16_t crc_table[256] = { * July 21, 1997 - Andrew E. Mileski * Adapted from OSTA-UDF(tm) 1.50 standard. */ -uint16_t -udf_crc(uint8_t *data, uint32_t size, uint16_t crc) +uint16_t udf_crc(uint8_t * data, uint32_t size, uint16_t crc) { while (size--) crc = crc_table[(crc >> 8 ^ *(data++)) & 0xffU] ^ (crc << 8); @@ -112,7 +111,7 @@ int main(void) return 0; } -#endif /* defined(TEST) */ +#endif /* defined(TEST) */ /****************************************************************************/ #if defined(GENERATE) @@ -138,7 +137,7 @@ int main(int argc, char **argv) /* Get the polynomial */ sscanf(argv[1], "%lo", &poly); - if (poly & 0xffff0000U){ + if (poly & 0xffff0000U) { fprintf(stderr, "polynomial is too large\en"); exit(1); } @@ -147,22 +146,22 @@ int main(int argc, char **argv) /* Create a table */ printf("static unsigned short crc_table[256] = {\n"); - for (n = 0; n < 256; n++){ + for (n = 0; n < 256; n++) { if (n % 8 == 0) printf("\t"); crc = n << 8; - for (i = 0; i < 8; i++){ - if(crc & 0x8000U) + for (i = 0; i < 8; i++) { + if (crc & 0x8000U) crc = (crc << 1) ^ poly; else crc <<= 1; - crc &= 0xFFFFU; + crc &= 0xFFFFU; } if (n == 255) printf("0x%04xU ", crc); else printf("0x%04xU, ", crc); - if(n % 8 == 7) + if (n % 8 == 7) printf("\n"); } printf("};\n"); @@ -170,4 +169,4 @@ int main(int argc, char **argv) return 0; } -#endif /* defined(GENERATE) */ +#endif /* defined(GENERATE) */ diff --git a/fs/udf/dir.c b/fs/udf/dir.c index e45f86b5e7b0..79bab9fe120c 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -43,10 +43,10 @@ static int do_udf_readdir(struct inode *, struct file *, filldir_t, void *); /* readdir and lookup functions */ const struct file_operations udf_dir_operations = { - .read = generic_read_dir, - .readdir = udf_readdir, - .ioctl = udf_ioctl, - .fsync = udf_fsync_file, + .read = generic_read_dir, + .readdir = udf_readdir, + .ioctl = udf_ioctl, + .fsync = udf_fsync_file, }; /* @@ -82,26 +82,26 @@ int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) lock_kernel(); - if ( filp->f_pos == 0 ) - { - if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) - { + if (filp->f_pos == 0) { + if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < + 0) { unlock_kernel(); return 0; } - filp->f_pos ++; + filp->f_pos++; } result = do_udf_readdir(dir, filp, filldir, dirent); unlock_kernel(); - return result; + return result; } -static int -do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *dirent) +static int +do_udf_readdir(struct inode *dir, struct file *filp, filldir_t filldir, + void *dirent) { struct udf_fileident_bh fibh; - struct fileIdentDesc *fi=NULL; + struct fileIdentDesc *fi = NULL; struct fileIdentDesc cfi; int block, iblock; loff_t nf_pos = filp->f_pos - 1; @@ -117,7 +117,7 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d sector_t offset; int i, num; unsigned int dt_type; - struct extent_position epos = { NULL, 0, {0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; if (nf_pos >= size) return 0; @@ -125,65 +125,61 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d if (nf_pos == 0) nf_pos = (udf_ext0_offset(dir) >> 2); - fibh.soffset = fibh.eoffset = (nf_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; + fibh.soffset = fibh.eoffset = + (nf_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh.sbh = fibh.ebh = NULL; else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) - { + &epos, &eloc, &elen, + &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); - if ((++offset << dir->i_sb->s_blocksize_bits) < elen) - { + if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } - else + } else offset = 0; - if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) - { + if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { brelse(epos.bh); return -EIO; } - - if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9))-1))) - { + + if (!(offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) { i = 16 >> (dir->i_sb->s_blocksize_bits - 9); - if (i+offset > (elen >> dir->i_sb->s_blocksize_bits)) - i = (elen >> dir->i_sb->s_blocksize_bits)-offset; - for (num=0; i>0; i--) - { - block = udf_get_lb_pblock(dir->i_sb, eloc, offset+i); + if (i + offset > (elen >> dir->i_sb->s_blocksize_bits)) + i = (elen >> dir->i_sb->s_blocksize_bits) - + offset; + for (num = 0; i > 0; i--) { + block = + udf_get_lb_pblock(dir->i_sb, eloc, + offset + i); tmp = udf_tgetblk(dir->i_sb, block); - if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp)) + if (tmp && !buffer_uptodate(tmp) + && !buffer_locked(tmp)) bha[num++] = tmp; else brelse(tmp); } - if (num) - { + if (num) { ll_rw_block(READA, num, bha); - for (i=0; i<num; i++) + for (i = 0; i < num; i++) brelse(bha[i]); } } - } - else - { + } else { brelse(epos.bh); return -ENOENT; } - while ( nf_pos < size ) - { + while (nf_pos < size) { filp->f_pos = nf_pos + 1; - fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); + fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, + &elen, &offset); - if (!fi) - { + if (!fi) { if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); @@ -196,43 +192,41 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d if (fibh.sbh == fibh.ebh) nameptr = fi->fileIdent + liu; - else - { + else { int poffset; /* Unpaded ending offset */ - poffset = fibh.soffset + sizeof(struct fileIdentDesc) + liu + lfi; + poffset = + fibh.soffset + sizeof(struct fileIdentDesc) + liu + + lfi; if (poffset >= lfi) - nameptr = (char *)(fibh.ebh->b_data + poffset - lfi); - else - { + nameptr = + (char *)(fibh.ebh->b_data + poffset - lfi); + else { nameptr = fname; - memcpy(nameptr, fi->fileIdent + liu, lfi - poffset); - memcpy(nameptr + lfi - poffset, fibh.ebh->b_data, poffset); + memcpy(nameptr, fi->fileIdent + liu, + lfi - poffset); + memcpy(nameptr + lfi - poffset, + fibh.ebh->b_data, poffset); } } - if ( (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) != 0 ) - { - if ( !UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNDELETE) ) + if ((cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { + if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNDELETE)) continue; } - - if ( (cfi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0 ) - { - if ( !UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNHIDE) ) + + if ((cfi.fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0) { + if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNHIDE)) continue; } - if ( cfi.fileCharacteristics & FID_FILE_CHAR_PARENT ) - { + if (cfi.fileCharacteristics & FID_FILE_CHAR_PARENT) { iblock = parent_ino(filp->f_path.dentry); flen = 2; memcpy(fname, "..", flen); dt_type = DT_DIR; - } - else - { + } else { kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); iblock = udf_get_lb_pblock(dir->i_sb, tloc, 0); @@ -240,18 +234,18 @@ do_udf_readdir(struct inode * dir, struct file *filp, filldir_t filldir, void *d dt_type = DT_UNKNOWN; } - if (flen) - { - if (filldir(dirent, fname, flen, filp->f_pos, iblock, dt_type) < 0) - { + if (flen) { + if (filldir + (dirent, fname, flen, filp->f_pos, iblock, + dt_type) < 0) { if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); brelse(epos.bh); - return 0; + return 0; } } - } /* end while */ + } /* end while */ filp->f_pos = nf_pos + 1; diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 198caa33027a..8adc77c1d579 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -19,10 +19,10 @@ #include <linux/buffer_head.h> #if 0 -static uint8_t * -udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size, - kernel_lb_addr fe_loc, int *pos, int *offset, - struct buffer_head **bh, int *error) +static uint8_t *udf_filead_read(struct inode *dir, uint8_t * tmpad, + uint8_t ad_size, kernel_lb_addr fe_loc, + int *pos, int *offset, struct buffer_head **bh, + int *error) { int loffset = *offset; int block; @@ -31,31 +31,27 @@ udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size, *error = 0; - ad = (uint8_t *)(*bh)->b_data + *offset; + ad = (uint8_t *) (*bh)->b_data + *offset; *offset += ad_size; - if (!ad) - { + if (!ad) { brelse(*bh); *error = 1; return NULL; } - if (*offset == dir->i_sb->s_blocksize) - { + if (*offset == dir->i_sb->s_blocksize) { brelse(*bh); block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); if (!block) return NULL; if (!(*bh = udf_tread(dir->i_sb, block))) return NULL; - } - else if (*offset > dir->i_sb->s_blocksize) - { + } else if (*offset > dir->i_sb->s_blocksize) { ad = tmpad; remainder = dir->i_sb->s_blocksize - loffset; - memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder); + memcpy((uint8_t *) ad, (*bh)->b_data + loffset, remainder); brelse(*bh); block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos); @@ -64,56 +60,56 @@ udf_filead_read(struct inode *dir, uint8_t *tmpad, uint8_t ad_size, if (!((*bh) = udf_tread(dir->i_sb, block))) return NULL; - memcpy((uint8_t *)ad + remainder, (*bh)->b_data, ad_size - remainder); + memcpy((uint8_t *) ad + remainder, (*bh)->b_data, + ad_size - remainder); *offset = ad_size - remainder; } return ad; } #endif -struct fileIdentDesc * -udf_fileident_read(struct inode *dir, loff_t *nf_pos, - struct udf_fileident_bh *fibh, - struct fileIdentDesc *cfi, - struct extent_position *epos, - kernel_lb_addr *eloc, uint32_t *elen, - sector_t *offset) +struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t * nf_pos, + struct udf_fileident_bh *fibh, + struct fileIdentDesc *cfi, + struct extent_position *epos, + kernel_lb_addr * eloc, uint32_t * elen, + sector_t * offset) { struct fileIdentDesc *fi; int i, num, block; - struct buffer_head * tmp, * bha[16]; + struct buffer_head *tmp, *bha[16]; fibh->soffset = fibh->eoffset; - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { fi = udf_get_fileident(UDF_I_DATA(dir) - - (UDF_I_EFE(dir) ? - sizeof(struct extendedFileEntry) : - sizeof(struct fileEntry)), - dir->i_sb->s_blocksize, &(fibh->eoffset)); + (UDF_I_EFE(dir) ? + sizeof(struct extendedFileEntry) : + sizeof(struct fileEntry)), + dir->i_sb->s_blocksize, + &(fibh->eoffset)); if (!fi) return NULL; *nf_pos += ((fibh->eoffset - fibh->soffset) >> 2); - memcpy((uint8_t *)cfi, (uint8_t *)fi, sizeof(struct fileIdentDesc)); + memcpy((uint8_t *) cfi, (uint8_t *) fi, + sizeof(struct fileIdentDesc)); return fi; } - if (fibh->eoffset == dir->i_sb->s_blocksize) - { + if (fibh->eoffset == dir->i_sb->s_blocksize) { int lextoffset = epos->offset; if (udf_next_aext(dir, epos, eloc, elen, 1) != - (EXT_RECORDED_ALLOCATED >> 30)) + (EXT_RECORDED_ALLOCATED >> 30)) return NULL; block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); - (*offset) ++; + (*offset)++; if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) *offset = 0; @@ -125,57 +121,57 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, return NULL; fibh->soffset = fibh->eoffset = 0; - if (!(*offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9))-1))) + if (! + (*offset & ((16 >> (dir->i_sb->s_blocksize_bits - 9)) - 1))) { i = 16 >> (dir->i_sb->s_blocksize_bits - 9); - if (i+*offset > (*elen >> dir->i_sb->s_blocksize_bits)) - i = (*elen >> dir->i_sb->s_blocksize_bits)-*offset; - for (num=0; i>0; i--) - { - block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset+i); + if (i + *offset > + (*elen >> dir->i_sb->s_blocksize_bits)) + i = (*elen >> dir->i_sb->s_blocksize_bits) - + *offset; + for (num = 0; i > 0; i--) { + block = + udf_get_lb_pblock(dir->i_sb, *eloc, + *offset + i); tmp = udf_tgetblk(dir->i_sb, block); - if (tmp && !buffer_uptodate(tmp) && !buffer_locked(tmp)) + if (tmp && !buffer_uptodate(tmp) + && !buffer_locked(tmp)) bha[num++] = tmp; else brelse(tmp); } - if (num) - { + if (num) { ll_rw_block(READA, num, bha); - for (i=0; i<num; i++) + for (i = 0; i < num; i++) brelse(bha[i]); } } - } - else if (fibh->sbh != fibh->ebh) - { + } else if (fibh->sbh != fibh->ebh) { brelse(fibh->sbh); fibh->sbh = fibh->ebh; } fi = udf_get_fileident(fibh->sbh->b_data, dir->i_sb->s_blocksize, - &(fibh->eoffset)); + &(fibh->eoffset)); if (!fi) return NULL; *nf_pos += ((fibh->eoffset - fibh->soffset) >> 2); - if (fibh->eoffset <= dir->i_sb->s_blocksize) - { - memcpy((uint8_t *)cfi, (uint8_t *)fi, sizeof(struct fileIdentDesc)); - } - else if (fibh->eoffset > dir->i_sb->s_blocksize) - { + if (fibh->eoffset <= dir->i_sb->s_blocksize) { + memcpy((uint8_t *) cfi, (uint8_t *) fi, + sizeof(struct fileIdentDesc)); + } else if (fibh->eoffset > dir->i_sb->s_blocksize) { int lextoffset = epos->offset; if (udf_next_aext(dir, epos, eloc, elen, 1) != - (EXT_RECORDED_ALLOCATED >> 30)) + (EXT_RECORDED_ALLOCATED >> 30)) return NULL; block = udf_get_lb_pblock(dir->i_sb, *eloc, *offset); - (*offset) ++; + (*offset)++; if ((*offset << dir->i_sb->s_blocksize_bits) >= *elen) *offset = 0; @@ -188,62 +184,62 @@ udf_fileident_read(struct inode *dir, loff_t *nf_pos, if (!(fibh->ebh = udf_tread(dir->i_sb, block))) return NULL; - if (sizeof(struct fileIdentDesc) > - fibh->soffset) - { + if (sizeof(struct fileIdentDesc) > -fibh->soffset) { int fi_len; - memcpy((uint8_t *)cfi, (uint8_t *)fi, - fibh->soffset); - memcpy((uint8_t *)cfi - fibh->soffset, fibh->ebh->b_data, - sizeof(struct fileIdentDesc) + fibh->soffset); + memcpy((uint8_t *) cfi, (uint8_t *) fi, -fibh->soffset); + memcpy((uint8_t *) cfi - fibh->soffset, + fibh->ebh->b_data, + sizeof(struct fileIdentDesc) + fibh->soffset); - fi_len = (sizeof(struct fileIdentDesc) + cfi->lengthFileIdent + - le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3; + fi_len = + (sizeof(struct fileIdentDesc) + + cfi->lengthFileIdent + + le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3; - *nf_pos += ((fi_len - (fibh->eoffset - fibh->soffset)) >> 2); + *nf_pos += + ((fi_len - (fibh->eoffset - fibh->soffset)) >> 2); fibh->eoffset = fibh->soffset + fi_len; - } - else - { - memcpy((uint8_t *)cfi, (uint8_t *)fi, sizeof(struct fileIdentDesc)); + } else { + memcpy((uint8_t *) cfi, (uint8_t *) fi, + sizeof(struct fileIdentDesc)); } } return fi; } -struct fileIdentDesc * -udf_get_fileident(void * buffer, int bufsize, int * offset) +struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset) { struct fileIdentDesc *fi; int lengthThisIdent; - uint8_t * ptr; + uint8_t *ptr; int padlen; - if ( (!buffer) || (!offset) ) { - udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer, offset); + if ((!buffer) || (!offset)) { + udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer, + offset); return NULL; } ptr = buffer; - if ( (*offset > 0) && (*offset < bufsize) ) { + if ((*offset > 0) && (*offset < bufsize)) { ptr += *offset; } - fi=(struct fileIdentDesc *)ptr; - if (le16_to_cpu(fi->descTag.tagIdent) != TAG_IDENT_FID) - { + fi = (struct fileIdentDesc *)ptr; + if (le16_to_cpu(fi->descTag.tagIdent) != TAG_IDENT_FID) { udf_debug("0x%x != TAG_IDENT_FID\n", - le16_to_cpu(fi->descTag.tagIdent)); + le16_to_cpu(fi->descTag.tagIdent)); udf_debug("offset: %u sizeof: %lu bufsize: %u\n", - *offset, (unsigned long)sizeof(struct fileIdentDesc), bufsize); + *offset, (unsigned long)sizeof(struct fileIdentDesc), + bufsize); return NULL; } - if ( (*offset + sizeof(struct fileIdentDesc)) > bufsize ) - { + if ((*offset + sizeof(struct fileIdentDesc)) > bufsize) { lengthThisIdent = sizeof(struct fileIdentDesc); - } - else + } else lengthThisIdent = sizeof(struct fileIdentDesc) + - fi->lengthFileIdent + le16_to_cpu(fi->lengthOfImpUse); + fi->lengthFileIdent + le16_to_cpu(fi->lengthOfImpUse); /* we need to figure padding, too! */ padlen = lengthThisIdent % UDF_NAME_PAD; @@ -255,56 +251,53 @@ udf_get_fileident(void * buffer, int bufsize, int * offset) } #if 0 -static extent_ad * -udf_get_fileextent(void * buffer, int bufsize, int * offset) +static extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset) { - extent_ad * ext; + extent_ad *ext; struct fileEntry *fe; - uint8_t * ptr; + uint8_t *ptr; - if ( (!buffer) || (!offset) ) - { + if ((!buffer) || (!offset)) { printk(KERN_ERR "udf: udf_get_fileextent() invalidparms\n"); return NULL; } fe = (struct fileEntry *)buffer; - if ( le16_to_cpu(fe->descTag.tagIdent) != TAG_IDENT_FE ) - { + if (le16_to_cpu(fe->descTag.tagIdent) != TAG_IDENT_FE) { udf_debug("0x%x != TAG_IDENT_FE\n", - le16_to_cpu(fe->descTag.tagIdent)); + le16_to_cpu(fe->descTag.tagIdent)); return NULL; } - ptr=(uint8_t *)(fe->extendedAttr) + le32_to_cpu(fe->lengthExtendedAttr); + ptr = + (uint8_t *) (fe->extendedAttr) + + le32_to_cpu(fe->lengthExtendedAttr); - if ( (*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)) ) - { + if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs))) { ptr += *offset; } - ext = (extent_ad *)ptr; + ext = (extent_ad *) ptr; *offset = *offset + sizeof(extent_ad); return ext; } #endif -short_ad * -udf_get_fileshortad(uint8_t *ptr, int maxoffset, int *offset, int inc) +short_ad *udf_get_fileshortad(uint8_t * ptr, int maxoffset, int *offset, + int inc) { short_ad *sa; - if ( (!ptr) || (!offset) ) - { + if ((!ptr) || (!offset)) { printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); return NULL; } - if ( (*offset < 0) || ((*offset + sizeof(short_ad)) > maxoffset) ) + if ((*offset < 0) || ((*offset + sizeof(short_ad)) > maxoffset)) return NULL; - else if ((sa = (short_ad *)ptr)->extLength == 0) + else if ((sa = (short_ad *) ptr)->extLength == 0) return NULL; if (inc) @@ -312,20 +305,18 @@ udf_get_fileshortad(uint8_t *ptr, int maxoffset, int *offset, int inc) return sa; } -long_ad * -udf_get_filelongad(uint8_t *ptr, int maxoffset, int * offset, int inc) +long_ad *udf_get_filelongad(uint8_t * ptr, int maxoffset, int *offset, int inc) { long_ad *la; - if ( (!ptr) || (!offset) ) - { + if ((!ptr) || (!offset)) { printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); return NULL; } - if ( (*offset < 0) || ((*offset + sizeof(long_ad)) > maxoffset) ) + if ((*offset < 0) || ((*offset + sizeof(long_ad)) > maxoffset)) return NULL; - else if ((la = (long_ad *)ptr)->extLength == 0) + else if ((la = (long_ad *) ptr)->extLength == 0) return NULL; if (inc) diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index f81f2ebbf508..294ce2daa03a 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h @@ -38,10 +38,9 @@ #define _ECMA_167_H 1 /* Character set specification (ECMA 167r3 1/7.2.1) */ -typedef struct -{ - uint8_t charSetType; - uint8_t charSetInfo[63]; +typedef struct { + uint8_t charSetType; + uint8_t charSetInfo[63]; } __attribute__ ((packed)) charspec; /* Character Set Type (ECMA 167r3 1/7.2.1.1) */ @@ -55,35 +54,33 @@ typedef struct #define CHARSPEC_TYPE_CS7 0x07 /* (1/7.2.9) */ #define CHARSPEC_TYPE_CS8 0x08 /* (1/7.2.10) */ -typedef uint8_t dstring; +typedef uint8_t dstring; /* Timestamp (ECMA 167r3 1/7.3) */ -typedef struct -{ - __le16 typeAndTimezone; - __le16 year; - uint8_t month; - uint8_t day; - uint8_t hour; - uint8_t minute; - uint8_t second; - uint8_t centiseconds; - uint8_t hundredsOfMicroseconds; - uint8_t microseconds; +typedef struct { + __le16 typeAndTimezone; + __le16 year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t minute; + uint8_t second; + uint8_t centiseconds; + uint8_t hundredsOfMicroseconds; + uint8_t microseconds; } __attribute__ ((packed)) timestamp; -typedef struct -{ - uint16_t typeAndTimezone; - int16_t year; - uint8_t month; - uint8_t day; - uint8_t hour; - uint8_t minute; - uint8_t second; - uint8_t centiseconds; - uint8_t hundredsOfMicroseconds; - uint8_t microseconds; +typedef struct { + uint16_t typeAndTimezone; + int16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t minute; + uint8_t second; + uint8_t centiseconds; + uint8_t hundredsOfMicroseconds; + uint8_t microseconds; } __attribute__ ((packed)) kernel_timestamp; /* Type and Time Zone (ECMA 167r3 1/7.3.1) */ @@ -94,11 +91,10 @@ typedef struct #define TIMESTAMP_TIMEZONE_MASK 0x0FFF /* Entity identifier (ECMA 167r3 1/7.4) */ -typedef struct -{ - uint8_t flags; - uint8_t ident[23]; - uint8_t identSuffix[8]; +typedef struct { + uint8_t flags; + uint8_t ident[23]; + uint8_t identSuffix[8]; } __attribute__ ((packed)) regid; /* Flags (ECMA 167r3 1/7.4.1) */ @@ -107,12 +103,11 @@ typedef struct /* Volume Structure Descriptor (ECMA 167r3 2/9.1) */ #define VSD_STD_ID_LEN 5 -struct volStructDesc -{ - uint8_t structType; - uint8_t stdIdent[VSD_STD_ID_LEN]; - uint8_t structVersion; - uint8_t structData[2041]; +struct volStructDesc { + uint8_t structType; + uint8_t stdIdent[VSD_STD_ID_LEN]; + uint8_t structVersion; + uint8_t structData[2041]; } __attribute__ ((packed)); /* Standard Identifier (EMCA 167r2 2/9.1.2) */ @@ -127,69 +122,63 @@ struct volStructDesc #define VSD_STD_ID_TEA01 "TEA01" /* (2/9.3) */ /* Beginning Extended Area Descriptor (ECMA 167r3 2/9.2) */ -struct beginningExtendedAreaDesc -{ - uint8_t structType; - uint8_t stdIdent[VSD_STD_ID_LEN]; - uint8_t structVersion; - uint8_t structData[2041]; +struct beginningExtendedAreaDesc { + uint8_t structType; + uint8_t stdIdent[VSD_STD_ID_LEN]; + uint8_t structVersion; + uint8_t structData[2041]; } __attribute__ ((packed)); /* Terminating Extended Area Descriptor (ECMA 167r3 2/9.3) */ -struct terminatingExtendedAreaDesc -{ - uint8_t structType; - uint8_t stdIdent[VSD_STD_ID_LEN]; - uint8_t structVersion; - uint8_t structData[2041]; +struct terminatingExtendedAreaDesc { + uint8_t structType; + uint8_t stdIdent[VSD_STD_ID_LEN]; + uint8_t structVersion; + uint8_t structData[2041]; } __attribute__ ((packed)); /* Boot Descriptor (ECMA 167r3 2/9.4) */ -struct bootDesc -{ - uint8_t structType; - uint8_t stdIdent[VSD_STD_ID_LEN]; - uint8_t structVersion; - uint8_t reserved1; - regid archType; - regid bootIdent; - __le32 bootExtLocation; - __le32 bootExtLength; - __le64 loadAddress; - __le64 startAddress; - timestamp descCreationDateAndTime; - __le16 flags; - uint8_t reserved2[32]; - uint8_t bootUse[1906]; +struct bootDesc { + uint8_t structType; + uint8_t stdIdent[VSD_STD_ID_LEN]; + uint8_t structVersion; + uint8_t reserved1; + regid archType; + regid bootIdent; + __le32 bootExtLocation; + __le32 bootExtLength; + __le64 loadAddress; + __le64 startAddress; + timestamp descCreationDateAndTime; + __le16 flags; + uint8_t reserved2[32]; + uint8_t bootUse[1906]; } __attribute__ ((packed)); /* Flags (ECMA 167r3 2/9.4.12) */ #define BOOT_FLAGS_ERASE 0x01 /* Extent Descriptor (ECMA 167r3 3/7.1) */ -typedef struct -{ - __le32 extLength; - __le32 extLocation; +typedef struct { + __le32 extLength; + __le32 extLocation; } __attribute__ ((packed)) extent_ad; -typedef struct -{ - uint32_t extLength; - uint32_t extLocation; +typedef struct { + uint32_t extLength; + uint32_t extLocation; } kernel_extent_ad; /* Descriptor Tag (ECMA 167r3 3/7.2) */ -typedef struct -{ - __le16 tagIdent; - __le16 descVersion; - uint8_t tagChecksum; - uint8_t reserved; - __le16 tagSerialNum; - __le16 descCRC; - __le16 descCRCLength; - __le32 tagLocation; +typedef struct { + __le16 tagIdent; + __le16 descVersion; + uint8_t tagChecksum; + uint8_t reserved; + __le16 tagSerialNum; + __le16 descCRC; + __le16 descCRCLength; + __le32 tagLocation; } __attribute__ ((packed)) tag; /* Tag Identifier (ECMA 167r3 3/7.2.1) */ @@ -204,87 +193,81 @@ typedef struct #define TAG_IDENT_LVID 0x0009 /* NSR Descriptor (ECMA 167r3 3/9.1) */ -struct NSRDesc -{ - uint8_t structType; - uint8_t stdIdent[VSD_STD_ID_LEN]; - uint8_t structVersion; - uint8_t reserved; - uint8_t structData[2040]; -} __attribute__ ((packed)); - +struct NSRDesc { + uint8_t structType; + uint8_t stdIdent[VSD_STD_ID_LEN]; + uint8_t structVersion; + uint8_t reserved; + uint8_t structData[2040]; +} __attribute__ ((packed)); + /* Primary Volume Descriptor (ECMA 167r3 3/10.1) */ -struct primaryVolDesc -{ - tag descTag; - __le32 volDescSeqNum; - __le32 primaryVolDescNum; - dstring volIdent[32]; - __le16 volSeqNum; - __le16 maxVolSeqNum; - __le16 interchangeLvl; - __le16 maxInterchangeLvl; - __le32 charSetList; - __le32 maxCharSetList; - dstring volSetIdent[128]; - charspec descCharSet; - charspec explanatoryCharSet; - extent_ad volAbstract; - extent_ad volCopyright; - regid appIdent; - timestamp recordingDateAndTime; - regid impIdent; - uint8_t impUse[64]; - __le32 predecessorVolDescSeqLocation; - __le16 flags; - uint8_t reserved[22]; +struct primaryVolDesc { + tag descTag; + __le32 volDescSeqNum; + __le32 primaryVolDescNum; + dstring volIdent[32]; + __le16 volSeqNum; + __le16 maxVolSeqNum; + __le16 interchangeLvl; + __le16 maxInterchangeLvl; + __le32 charSetList; + __le32 maxCharSetList; + dstring volSetIdent[128]; + charspec descCharSet; + charspec explanatoryCharSet; + extent_ad volAbstract; + extent_ad volCopyright; + regid appIdent; + timestamp recordingDateAndTime; + regid impIdent; + uint8_t impUse[64]; + __le32 predecessorVolDescSeqLocation; + __le16 flags; + uint8_t reserved[22]; } __attribute__ ((packed)); /* Flags (ECMA 167r3 3/10.1.21) */ #define PVD_FLAGS_VSID_COMMON 0x0001 /* Anchor Volume Descriptor Pointer (ECMA 167r3 3/10.2) */ -struct anchorVolDescPtr -{ - tag descTag; - extent_ad mainVolDescSeqExt; - extent_ad reserveVolDescSeqExt; - uint8_t reserved[480]; +struct anchorVolDescPtr { + tag descTag; + extent_ad mainVolDescSeqExt; + extent_ad reserveVolDescSeqExt; + uint8_t reserved[480]; } __attribute__ ((packed)); /* Volume Descriptor Pointer (ECMA 167r3 3/10.3) */ -struct volDescPtr -{ - tag descTag; - __le32 volDescSeqNum; - extent_ad nextVolDescSeqExt; - uint8_t reserved[484]; +struct volDescPtr { + tag descTag; + __le32 volDescSeqNum; + extent_ad nextVolDescSeqExt; + uint8_t reserved[484]; } __attribute__ ((packed)); /* Implementation Use Volume Descriptor (ECMA 167r3 3/10.4) */ -struct impUseVolDesc -{ - tag descTag; - __le32 volDescSeqNum; - regid impIdent; - uint8_t impUse[460]; +struct impUseVolDesc { + tag descTag; + __le32 volDescSeqNum; + regid impIdent; + uint8_t impUse[460]; } __attribute__ ((packed)); /* Partition Descriptor (ECMA 167r3 3/10.5) */ -struct partitionDesc -{ - tag descTag; - __le32 volDescSeqNum; - __le16 partitionFlags; - __le16 partitionNumber; - regid partitionContents; - uint8_t partitionContentsUse[128]; - __le32 accessType; - __le32 partitionStartingLocation; - __le32 partitionLength; - regid impIdent; - uint8_t impUse[128]; - uint8_t reserved[156]; +struct partitionDesc { + tag descTag; + __le32 volDescSeqNum; + __le16 partitionFlags; + __le16 partitionNumber; + regid partitionContents; + uint8_t partitionContentsUse[128]; + __le32 accessType; + __le32 partitionStartingLocation; + __le32 partitionLength; + regid impIdent; + uint8_t impUse[128]; + uint8_t reserved[156]; } __attribute__ ((packed)); /* Partition Flags (ECMA 167r3 3/10.5.3) */ @@ -307,29 +290,27 @@ struct partitionDesc #define PD_ACCESS_TYPE_OVERWRITABLE 0x00000004 /* Logical Volume Descriptor (ECMA 167r3 3/10.6) */ -struct logicalVolDesc -{ - tag descTag; - __le32 volDescSeqNum; - charspec descCharSet; - dstring logicalVolIdent[128]; - __le32 logicalBlockSize; - regid domainIdent; - uint8_t logicalVolContentsUse[16]; - __le32 mapTableLength; - __le32 numPartitionMaps; - regid impIdent; - uint8_t impUse[128]; - extent_ad integritySeqExt; - uint8_t partitionMaps[0]; +struct logicalVolDesc { + tag descTag; + __le32 volDescSeqNum; + charspec descCharSet; + dstring logicalVolIdent[128]; + __le32 logicalBlockSize; + regid domainIdent; + uint8_t logicalVolContentsUse[16]; + __le32 mapTableLength; + __le32 numPartitionMaps; + regid impIdent; + uint8_t impUse[128]; + extent_ad integritySeqExt; + uint8_t partitionMaps[0]; } __attribute__ ((packed)); /* Generic Partition Map (ECMA 167r3 3/10.7.1) */ -struct genericPartitionMap -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - uint8_t partitionMapping[0]; +struct genericPartitionMap { + uint8_t partitionMapType; + uint8_t partitionMapLength; + uint8_t partitionMapping[0]; } __attribute__ ((packed)); /* Partition Map Type (ECMA 167r3 3/10.7.1.1) */ @@ -338,51 +319,46 @@ struct genericPartitionMap #define GP_PARTITION_MAP_TYPE_2 0x02 /* Type 1 Partition Map (ECMA 167r3 3/10.7.2) */ -struct genericPartitionMap1 -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - __le16 volSeqNum; - __le16 partitionNum; +struct genericPartitionMap1 { + uint8_t partitionMapType; + uint8_t partitionMapLength; + __le16 volSeqNum; + __le16 partitionNum; } __attribute__ ((packed)); /* Type 2 Partition Map (ECMA 167r3 3/10.7.3) */ -struct genericPartitionMap2 -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - uint8_t partitionIdent[62]; +struct genericPartitionMap2 { + uint8_t partitionMapType; + uint8_t partitionMapLength; + uint8_t partitionIdent[62]; } __attribute__ ((packed)); /* Unallocated Space Descriptor (ECMA 167r3 3/10.8) */ -struct unallocSpaceDesc -{ - tag descTag; - __le32 volDescSeqNum; - __le32 numAllocDescs; - extent_ad allocDescs[0]; +struct unallocSpaceDesc { + tag descTag; + __le32 volDescSeqNum; + __le32 numAllocDescs; + extent_ad allocDescs[0]; } __attribute__ ((packed)); /* Terminating Descriptor (ECMA 167r3 3/10.9) */ -struct terminatingDesc -{ - tag descTag; - uint8_t reserved[496]; +struct terminatingDesc { + tag descTag; + uint8_t reserved[496]; } __attribute__ ((packed)); /* Logical Volume Integrity Descriptor (ECMA 167r3 3/10.10) */ -struct logicalVolIntegrityDesc -{ - tag descTag; - timestamp recordingDateAndTime; - __le32 integrityType; - extent_ad nextIntegrityExt; - uint8_t logicalVolContentsUse[32]; - __le32 numOfPartitions; - __le32 lengthOfImpUse; - __le32 freeSpaceTable[0]; - __le32 sizeTable[0]; - uint8_t impUse[0]; +struct logicalVolIntegrityDesc { + tag descTag; + timestamp recordingDateAndTime; + __le32 integrityType; + extent_ad nextIntegrityExt; + uint8_t logicalVolContentsUse[32]; + __le32 numOfPartitions; + __le32 lengthOfImpUse; + __le32 freeSpaceTable[0]; + __le32 sizeTable[0]; + uint8_t impUse[0]; } __attribute__ ((packed)); /* Integrity Type (ECMA 167r3 3/10.10.3) */ @@ -390,56 +366,49 @@ struct logicalVolIntegrityDesc #define LVID_INTEGRITY_TYPE_CLOSE 0x00000001 /* Recorded Address (ECMA 167r3 4/7.1) */ -typedef struct -{ - __le32 logicalBlockNum; - __le16 partitionReferenceNum; +typedef struct { + __le32 logicalBlockNum; + __le16 partitionReferenceNum; } __attribute__ ((packed)) lb_addr; /* ... and its in-core analog */ -typedef struct -{ - uint32_t logicalBlockNum; - uint16_t partitionReferenceNum; +typedef struct { + uint32_t logicalBlockNum; + uint16_t partitionReferenceNum; } kernel_lb_addr; /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ -typedef struct -{ - __le32 extLength; - __le32 extPosition; +typedef struct { + __le32 extLength; + __le32 extPosition; } __attribute__ ((packed)) short_ad; /* Long Allocation Descriptor (ECMA 167r3 4/14.14.2) */ -typedef struct -{ - __le32 extLength; - lb_addr extLocation; - uint8_t impUse[6]; +typedef struct { + __le32 extLength; + lb_addr extLocation; + uint8_t impUse[6]; } __attribute__ ((packed)) long_ad; -typedef struct -{ - uint32_t extLength; - kernel_lb_addr extLocation; - uint8_t impUse[6]; +typedef struct { + uint32_t extLength; + kernel_lb_addr extLocation; + uint8_t impUse[6]; } kernel_long_ad; /* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */ -typedef struct -{ - __le32 extLength; - __le32 recordedLength; - __le32 informationLength; - lb_addr extLocation; +typedef struct { + __le32 extLength; + __le32 recordedLength; + __le32 informationLength; + lb_addr extLocation; } __attribute__ ((packed)) ext_ad; -typedef struct -{ - uint32_t extLength; - uint32_t recordedLength; - uint32_t informationLength; - kernel_lb_addr extLocation; +typedef struct { + uint32_t extLength; + uint32_t recordedLength; + uint32_t informationLength; + kernel_lb_addr extLocation; } kernel_ext_ad; /* Descriptor Tag (ECMA 167r3 4/7.2 - See 3/7.2) */ @@ -458,52 +427,49 @@ typedef struct #define TAG_IDENT_EFE 0x010A /* File Set Descriptor (ECMA 167r3 4/14.1) */ -struct fileSetDesc -{ - tag descTag; - timestamp recordingDateAndTime; - __le16 interchangeLvl; - __le16 maxInterchangeLvl; - __le32 charSetList; - __le32 maxCharSetList; - __le32 fileSetNum; - __le32 fileSetDescNum; - charspec logicalVolIdentCharSet; - dstring logicalVolIdent[128]; - charspec fileSetCharSet; - dstring fileSetIdent[32]; - dstring copyrightFileIdent[32]; - dstring abstractFileIdent[32]; - long_ad rootDirectoryICB; - regid domainIdent; - long_ad nextExt; - long_ad streamDirectoryICB; - uint8_t reserved[32]; +struct fileSetDesc { + tag descTag; + timestamp recordingDateAndTime; + __le16 interchangeLvl; + __le16 maxInterchangeLvl; + __le32 charSetList; + __le32 maxCharSetList; + __le32 fileSetNum; + __le32 fileSetDescNum; + charspec logicalVolIdentCharSet; + dstring logicalVolIdent[128]; + charspec fileSetCharSet; + dstring fileSetIdent[32]; + dstring copyrightFileIdent[32]; + dstring abstractFileIdent[32]; + long_ad rootDirectoryICB; + regid domainIdent; + long_ad nextExt; + long_ad streamDirectoryICB; + uint8_t reserved[32]; } __attribute__ ((packed)); /* Partition Header Descriptor (ECMA 167r3 4/14.3) */ -struct partitionHeaderDesc -{ - short_ad unallocSpaceTable; - short_ad unallocSpaceBitmap; - short_ad partitionIntegrityTable; - short_ad freedSpaceTable; - short_ad freedSpaceBitmap; - uint8_t reserved[88]; +struct partitionHeaderDesc { + short_ad unallocSpaceTable; + short_ad unallocSpaceBitmap; + short_ad partitionIntegrityTable; + short_ad freedSpaceTable; + short_ad freedSpaceBitmap; + uint8_t reserved[88]; } __attribute__ ((packed)); /* File Identifier Descriptor (ECMA 167r3 4/14.4) */ -struct fileIdentDesc -{ - tag descTag; - __le16 fileVersionNum; - uint8_t fileCharacteristics; - uint8_t lengthFileIdent; - long_ad icb; - __le16 lengthOfImpUse; - uint8_t impUse[0]; - uint8_t fileIdent[0]; - uint8_t padding[0]; +struct fileIdentDesc { + tag descTag; + __le16 fileVersionNum; + uint8_t fileCharacteristics; + uint8_t lengthFileIdent; + long_ad icb; + __le16 lengthOfImpUse; + uint8_t impUse[0]; + uint8_t fileIdent[0]; + uint8_t padding[0]; } __attribute__ ((packed)); /* File Characteristics (ECMA 167r3 4/14.4.3) */ @@ -514,24 +480,22 @@ struct fileIdentDesc #define FID_FILE_CHAR_METADATA 0x10 /* Allocation Ext Descriptor (ECMA 167r3 4/14.5) */ -struct allocExtDesc -{ - tag descTag; - __le32 previousAllocExtLocation; - __le32 lengthAllocDescs; +struct allocExtDesc { + tag descTag; + __le32 previousAllocExtLocation; + __le32 lengthAllocDescs; } __attribute__ ((packed)); /* ICB Tag (ECMA 167r3 4/14.6) */ -typedef struct -{ - __le32 priorRecordedNumDirectEntries; - __le16 strategyType; - __le16 strategyParameter; - __le16 numEntries; - uint8_t reserved; - uint8_t fileType; - lb_addr parentICBLocation; - __le16 flags; +typedef struct { + __le32 priorRecordedNumDirectEntries; + __le16 strategyType; + __le16 strategyParameter; + __le16 numEntries; + uint8_t reserved; + uint8_t fileType; + lb_addr parentICBLocation; + __le16 flags; } __attribute__ ((packed)) icbtag; /* Strategy Type (ECMA 167r3 4/14.6.2) */ @@ -576,45 +540,42 @@ typedef struct #define ICBTAG_FLAG_STREAM 0x2000 /* Indirect Entry (ECMA 167r3 4/14.7) */ -struct indirectEntry -{ - tag descTag; - icbtag icbTag; - long_ad indirectICB; +struct indirectEntry { + tag descTag; + icbtag icbTag; + long_ad indirectICB; } __attribute__ ((packed)); /* Terminal Entry (ECMA 167r3 4/14.8) */ -struct terminalEntry -{ - tag descTag; - icbtag icbTag; +struct terminalEntry { + tag descTag; + icbtag icbTag; } __attribute__ ((packed)); /* File Entry (ECMA 167r3 4/14.9) */ -struct fileEntry -{ - tag descTag; - icbtag icbTag; - __le32 uid; - __le32 gid; - __le32 permissions; - __le16 fileLinkCount; - uint8_t recordFormat; - uint8_t recordDisplayAttr; - __le32 recordLength; - __le64 informationLength; - __le64 logicalBlocksRecorded; - timestamp accessTime; - timestamp modificationTime; - timestamp attrTime; - __le32 checkpoint; - long_ad extendedAttrICB; - regid impIdent; - __le64 uniqueID; - __le32 lengthExtendedAttr; - __le32 lengthAllocDescs; - uint8_t extendedAttr[0]; - uint8_t allocDescs[0]; +struct fileEntry { + tag descTag; + icbtag icbTag; + __le32 uid; + __le32 gid; + __le32 permissions; + __le16 fileLinkCount; + uint8_t recordFormat; + uint8_t recordDisplayAttr; + __le32 recordLength; + __le64 informationLength; + __le64 logicalBlocksRecorded; + timestamp accessTime; + timestamp modificationTime; + timestamp attrTime; + __le32 checkpoint; + long_ad extendedAttrICB; + regid impIdent; + __le64 uniqueID; + __le32 lengthExtendedAttr; + __le32 lengthAllocDescs; + uint8_t extendedAttr[0]; + uint8_t allocDescs[0]; } __attribute__ ((packed)); /* Permissions (ECMA 167r3 4/14.9.5) */ @@ -655,57 +616,52 @@ struct fileEntry #define FE_RECORD_DISPLAY_ATTR_3 0x03 /* Extended Attribute Header Descriptor (ECMA 167r3 4/14.10.1) */ -struct extendedAttrHeaderDesc -{ - tag descTag; - __le32 impAttrLocation; - __le32 appAttrLocation; +struct extendedAttrHeaderDesc { + tag descTag; + __le32 impAttrLocation; + __le32 appAttrLocation; } __attribute__ ((packed)); /* Generic Format (ECMA 167r3 4/14.10.2) */ -struct genericFormat -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - uint8_t attrData[0]; +struct genericFormat { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + uint8_t attrData[0]; } __attribute__ ((packed)); /* Character Set Information (ECMA 167r3 4/14.10.3) */ -struct charSetInfo -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le32 escapeSeqLength; - uint8_t charSetType; - uint8_t escapeSeq[0]; +struct charSetInfo { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le32 escapeSeqLength; + uint8_t charSetType; + uint8_t escapeSeq[0]; } __attribute__ ((packed)); /* Alternate Permissions (ECMA 167r3 4/14.10.4) */ -struct altPerms -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le16 ownerIdent; - __le16 groupIdent; - __le16 permission; +struct altPerms { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le16 ownerIdent; + __le16 groupIdent; + __le16 permission; } __attribute__ ((packed)); /* File Times Extended Attribute (ECMA 167r3 4/14.10.5) */ -struct fileTimesExtAttr -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le32 dataLength; - __le32 fileTimeExistence; - uint8_t fileTimes; +struct fileTimesExtAttr { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le32 dataLength; + __le32 fileTimeExistence; + uint8_t fileTimes; } __attribute__ ((packed)); /* FileTimeExistence (ECMA 167r3 4/14.10.5.6) */ @@ -715,52 +671,48 @@ struct fileTimesExtAttr #define FTE_BACKUP 0x00000002 /* Information Times Extended Attribute (ECMA 167r3 4/14.10.6) */ -struct infoTimesExtAttr -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le32 dataLength; - __le32 infoTimeExistence; - uint8_t infoTimes[0]; +struct infoTimesExtAttr { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le32 dataLength; + __le32 infoTimeExistence; + uint8_t infoTimes[0]; } __attribute__ ((packed)); /* Device Specification (ECMA 167r3 4/14.10.7) */ -struct deviceSpec -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le32 impUseLength; - __le32 majorDeviceIdent; - __le32 minorDeviceIdent; - uint8_t impUse[0]; +struct deviceSpec { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le32 impUseLength; + __le32 majorDeviceIdent; + __le32 minorDeviceIdent; + uint8_t impUse[0]; } __attribute__ ((packed)); /* Implementation Use Extended Attr (ECMA 167r3 4/14.10.8) */ -struct impUseExtAttr -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le32 impUseLength; - regid impIdent; - uint8_t impUse[0]; +struct impUseExtAttr { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le32 impUseLength; + regid impIdent; + uint8_t impUse[0]; } __attribute__ ((packed)); /* Application Use Extended Attribute (ECMA 167r3 4/14.10.9) */ -struct appUseExtAttr -{ - __le32 attrType; - uint8_t attrSubtype; - uint8_t reserved[3]; - __le32 attrLength; - __le32 appUseLength; - regid appIdent; - uint8_t appUse[0]; +struct appUseExtAttr { + __le32 attrType; + uint8_t attrSubtype; + uint8_t reserved[3]; + __le32 attrLength; + __le32 appUseLength; + regid appIdent; + uint8_t appUse[0]; } __attribute__ ((packed)); #define EXTATTR_CHAR_SET 1 @@ -771,35 +723,31 @@ struct appUseExtAttr #define EXTATTR_IMP_USE 2048 #define EXTATTR_APP_USE 65536 - /* Unallocated Space Entry (ECMA 167r3 4/14.11) */ -struct unallocSpaceEntry -{ - tag descTag; - icbtag icbTag; - __le32 lengthAllocDescs; - uint8_t allocDescs[0]; +struct unallocSpaceEntry { + tag descTag; + icbtag icbTag; + __le32 lengthAllocDescs; + uint8_t allocDescs[0]; } __attribute__ ((packed)); /* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */ -struct spaceBitmapDesc -{ - tag descTag; - __le32 numOfBits; - __le32 numOfBytes; - uint8_t bitmap[0]; +struct spaceBitmapDesc { + tag descTag; + __le32 numOfBits; + __le32 numOfBytes; + uint8_t bitmap[0]; } __attribute__ ((packed)); /* Partition Integrity Entry (ECMA 167r3 4/14.13) */ -struct partitionIntegrityEntry -{ - tag descTag; - icbtag icbTag; - timestamp recordingDateAndTime; - uint8_t integrityType; - uint8_t reserved[175]; - regid impIdent; - uint8_t impUse[256]; +struct partitionIntegrityEntry { + tag descTag; + icbtag icbTag; + timestamp recordingDateAndTime; + uint8_t integrityType; + uint8_t reserved[175]; + regid impIdent; + uint8_t impUse[256]; } __attribute__ ((packed)); /* Short Allocation Descriptor (ECMA 167r3 4/14.14.1) */ @@ -815,50 +763,47 @@ struct partitionIntegrityEntry /* Extended Allocation Descriptor (ECMA 167r3 4/14.14.3) */ /* Logical Volume Header Descriptor (ECMA 167r3 4/14.15) */ -struct logicalVolHeaderDesc -{ - __le64 uniqueID; - uint8_t reserved[24]; +struct logicalVolHeaderDesc { + __le64 uniqueID; + uint8_t reserved[24]; } __attribute__ ((packed)); /* Path Component (ECMA 167r3 4/14.16.1) */ -struct pathComponent -{ - uint8_t componentType; - uint8_t lengthComponentIdent; - __le16 componentFileVersionNum; - dstring componentIdent[0]; +struct pathComponent { + uint8_t componentType; + uint8_t lengthComponentIdent; + __le16 componentFileVersionNum; + dstring componentIdent[0]; } __attribute__ ((packed)); /* File Entry (ECMA 167r3 4/14.17) */ -struct extendedFileEntry -{ - tag descTag; - icbtag icbTag; - __le32 uid; - __le32 gid; - __le32 permissions; - __le16 fileLinkCount; - uint8_t recordFormat; - uint8_t recordDisplayAttr; - __le32 recordLength; - __le64 informationLength; - __le64 objectSize; - __le64 logicalBlocksRecorded; - timestamp accessTime; - timestamp modificationTime; - timestamp createTime; - timestamp attrTime; - __le32 checkpoint; - __le32 reserved; - long_ad extendedAttrICB; - long_ad streamDirectoryICB; - regid impIdent; - __le64 uniqueID; - __le32 lengthExtendedAttr; - __le32 lengthAllocDescs; - uint8_t extendedAttr[0]; - uint8_t allocDescs[0]; -} __attribute__ ((packed)); - -#endif /* _ECMA_167_H */ +struct extendedFileEntry { + tag descTag; + icbtag icbTag; + __le32 uid; + __le32 gid; + __le32 permissions; + __le16 fileLinkCount; + uint8_t recordFormat; + uint8_t recordDisplayAttr; + __le32 recordLength; + __le64 informationLength; + __le64 objectSize; + __le64 logicalBlocksRecorded; + timestamp accessTime; + timestamp modificationTime; + timestamp createTime; + timestamp attrTime; + __le32 checkpoint; + __le32 reserved; + long_ad extendedAttrICB; + long_ad streamDirectoryICB; + regid impIdent; + __le64 uniqueID; + __le32 lengthExtendedAttr; + __le32 lengthAllocDescs; + uint8_t extendedAttr[0]; + uint8_t allocDescs[0]; +} __attribute__ ((packed)); + +#endif /* _ECMA_167_H */ diff --git a/fs/udf/file.c b/fs/udf/file.c index df070bee8d4f..67bf36bd3e6e 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -30,7 +30,7 @@ #include <linux/udf_fs.h> #include <asm/uaccess.h> #include <linux/kernel.h> -#include <linux/string.h> /* memset */ +#include <linux/string.h> /* memset */ #include <linux/capability.h> #include <linux/errno.h> #include <linux/smp_lock.h> @@ -41,7 +41,7 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct file *file, struct page * page) +static int udf_adinicb_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; @@ -58,7 +58,8 @@ static int udf_adinicb_readpage(struct file *file, struct page * page) return 0; } -static int udf_adinicb_writepage(struct page *page, struct writeback_control *wbc) +static int udf_adinicb_writepage(struct page *page, + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; char *kaddr; @@ -74,19 +75,21 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb return 0; } -static int udf_adinicb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int udf_adinicb_prepare_write(struct file *file, struct page *page, + unsigned offset, unsigned to) { kmap(page); return 0; } -static int udf_adinicb_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int udf_adinicb_commit_write(struct file *file, struct page *page, + unsigned offset, unsigned to) { struct inode *inode = page->mapping->host; char *kaddr = page_address(page); memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, - kaddr + offset, to - offset); + kaddr + offset, to - offset); mark_inode_dirty(inode); SetPageUptodate(page); kunmap(page); @@ -97,15 +100,15 @@ static int udf_adinicb_commit_write(struct file *file, struct page *page, unsign } const struct address_space_operations udf_adinicb_aops = { - .readpage = udf_adinicb_readpage, - .writepage = udf_adinicb_writepage, - .sync_page = block_sync_page, - .prepare_write = udf_adinicb_prepare_write, - .commit_write = udf_adinicb_commit_write, + .readpage = udf_adinicb_readpage, + .writepage = udf_adinicb_writepage, + .sync_page = block_sync_page, + .prepare_write = udf_adinicb_prepare_write, + .commit_write = udf_adinicb_commit_write, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t ppos) + unsigned long nr_segs, loff_t ppos) { ssize_t retval; struct file *file = iocb->ki_filp; @@ -113,25 +116,20 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, int err, pos; size_t count = iocb->ki_left; - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) pos = inode->i_size; else pos = ppos; - if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + - pos + count)) - { + if (inode->i_sb->s_blocksize < + (udf_file_entry_alloc_offset(inode) + pos + count)) { udf_expand_file_adinicb(inode, pos + count, &err); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { udf_debug("udf_expand_adinicb: err=%d\n", err); return err; } - } - else - { + } else { if (pos + count > inode->i_size) UDF_I_LENALLOC(inode) = pos + count; else @@ -181,48 +179,47 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, * Written, tested, and released. */ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg) + unsigned long arg) { int result = -EINVAL; - if ( file_permission(filp, MAY_READ) != 0 ) - { - udf_debug("no permission to access inode %lu\n", - inode->i_ino); + if (file_permission(filp, MAY_READ) != 0) { + udf_debug("no permission to access inode %lu\n", inode->i_ino); return -EPERM; } - if ( !arg ) - { + if (!arg) { udf_debug("invalid argument to udf_ioctl\n"); return -EINVAL; } - switch (cmd) - { - case UDF_GETVOLIDENT: - return copy_to_user((char __user *)arg, - UDF_SB_VOLIDENT(inode->i_sb), 32) ? -EFAULT : 0; - case UDF_RELOCATE_BLOCKS: + switch (cmd) { + case UDF_GETVOLIDENT: + return copy_to_user((char __user *)arg, + UDF_SB_VOLIDENT(inode->i_sb), + 32) ? -EFAULT : 0; + case UDF_RELOCATE_BLOCKS: { long old, new; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (get_user(old, (long __user *)arg)) return -EFAULT; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (get_user(old, (long __user *)arg)) + return -EFAULT; if ((result = udf_relocate_blocks(inode->i_sb, - old, &new)) == 0) + old, &new)) == 0) result = put_user(new, (long __user *)arg); return result; } - case UDF_GETEASIZE: - result = put_user(UDF_I_LENEATTR(inode), (int __user *)arg); - break; + case UDF_GETEASIZE: + result = put_user(UDF_I_LENEATTR(inode), (int __user *)arg); + break; - case UDF_GETEABLOCK: - result = copy_to_user((char __user *)arg, UDF_I_DATA(inode), - UDF_I_LENEATTR(inode)) ? -EFAULT : 0; - break; + case UDF_GETEABLOCK: + result = copy_to_user((char __user *)arg, UDF_I_DATA(inode), + UDF_I_LENEATTR(inode)) ? -EFAULT : 0; + break; } return result; @@ -240,10 +237,9 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, * HISTORY * */ -static int udf_release_file(struct inode * inode, struct file * filp) +static int udf_release_file(struct inode *inode, struct file *filp) { - if (filp->f_mode & FMODE_WRITE) - { + if (filp->f_mode & FMODE_WRITE) { lock_kernel(); udf_discard_prealloc(inode); unlock_kernel(); @@ -252,18 +248,18 @@ static int udf_release_file(struct inode * inode, struct file * filp) } const struct file_operations udf_file_operations = { - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .ioctl = udf_ioctl, - .open = generic_file_open, - .mmap = generic_file_mmap, - .write = do_sync_write, - .aio_write = udf_file_aio_write, - .release = udf_release_file, - .fsync = udf_fsync_file, - .splice_read = generic_file_splice_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, + .ioctl = udf_ioctl, + .open = generic_file_open, + .mmap = generic_file_mmap, + .write = do_sync_write, + .aio_write = udf_file_aio_write, + .release = udf_release_file, + .fsync = udf_fsync_file, + .splice_read = generic_file_splice_read, }; const struct inode_operations udf_file_inode_operations = { - .truncate = udf_truncate, + .truncate = udf_truncate, }; diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c index 6ded93e7c44f..7f0901c4f1f1 100644 --- a/fs/udf/fsync.c +++ b/fs/udf/fsync.c @@ -29,7 +29,7 @@ static int udf_fsync_inode(struct inode *, int); * even pass file to fsync ? */ -int udf_fsync_file(struct file * file, struct dentry *dentry, int datasync) +int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; return udf_fsync_inode(inode, datasync); @@ -45,6 +45,6 @@ static int udf_fsync_inode(struct inode *inode, int datasync) if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) return err; - err |= udf_sync_inode (inode); + err |= udf_sync_inode(inode); return err ? -EIO : 0; } diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 10f3188738af..2eb503806bce 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -28,7 +28,7 @@ #include "udf_i.h" #include "udf_sb.h" -void udf_free_inode(struct inode * inode) +void udf_free_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); @@ -46,10 +46,12 @@ void udf_free_inode(struct inode * inode) if (sbi->s_lvidbh) { if (S_ISDIR(inode->i_mode)) UDF_SB_LVIDIU(sb)->numDirs = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) - 1); + cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) + - 1); else UDF_SB_LVIDIU(sb)->numFiles = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) - 1); + cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + - 1); mark_buffer_dirty(sbi->s_lvidbh); } @@ -58,18 +60,17 @@ void udf_free_inode(struct inode * inode) udf_free_blocks(sb, NULL, UDF_I_LOCATION(inode), 0, 1); } -struct inode * udf_new_inode (struct inode *dir, int mode, int * err) +struct inode *udf_new_inode(struct inode *dir, int mode, int *err) { struct super_block *sb = dir->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); - struct inode * inode; + struct inode *inode; int block; uint32_t start = UDF_I_LOCATION(dir).logicalBlockNum; inode = new_inode(sb); - if (!inode) - { + if (!inode) { *err = -ENOMEM; return NULL; } @@ -81,26 +82,30 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) UDF_I_NEXT_ALLOC_GOAL(inode) = 0; UDF_I_STRAT4096(inode) = 0; - block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum, - start, err); - if (*err) - { + block = + udf_new_block(dir->i_sb, NULL, + UDF_I_LOCATION(dir).partitionReferenceNum, start, + err); + if (*err) { iput(inode); return NULL; } mutex_lock(&sbi->s_alloc_mutex); - if (UDF_SB_LVIDBH(sb)) - { + if (UDF_SB_LVIDBH(sb)) { struct logicalVolHeaderDesc *lvhd; uint64_t uniqueID; - lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(sb)->logicalVolContentsUse); + lvhd = + (struct logicalVolHeaderDesc *)(UDF_SB_LVID(sb)-> + logicalVolContentsUse); if (S_ISDIR(mode)) UDF_SB_LVIDIU(sb)->numDirs = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) + 1); + cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs) + + 1); else UDF_SB_LVIDIU(sb)->numFiles = - cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + 1); + cpu_to_le32(le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + + 1); UDF_I_UNIQUE(inode) = uniqueID = le64_to_cpu(lvhd->uniqueID); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; @@ -109,35 +114,34 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) } inode->i_mode = mode; inode->i_uid = current->fsuid; - if (dir->i_mode & S_ISGID) - { + if (dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; if (S_ISDIR(mode)) mode |= S_ISGID; - } - else + } else inode->i_gid = current->fsgid; UDF_I_LOCATION(inode).logicalBlockNum = block; - UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; + UDF_I_LOCATION(inode).partitionReferenceNum = + UDF_I_LOCATION(dir).partitionReferenceNum; inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0); inode->i_blocks = 0; UDF_I_LENEATTR(inode) = 0; UDF_I_LENALLOC(inode) = 0; UDF_I_USE(inode) = 0; - if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) - { + if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { UDF_I_EFE(inode) = 1; UDF_UPDATE_UDFREV(inode->i_sb, UDF_VERS_USE_EXTENDED_FE); - UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL); - } - else - { + UDF_I_DATA(inode) = + kzalloc(inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry), GFP_KERNEL); + } else { UDF_I_EFE(inode) = 0; - UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL); + UDF_I_DATA(inode) = + kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), + GFP_KERNEL); } - if (!UDF_I_DATA(inode)) - { + if (!UDF_I_DATA(inode)) { iput(inode); *err = -ENOMEM; mutex_unlock(&sbi->s_alloc_mutex); @@ -150,13 +154,12 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) else UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_LONG; inode->i_mtime = inode->i_atime = inode->i_ctime = - UDF_I_CRTIME(inode) = current_fs_time(inode->i_sb); + UDF_I_CRTIME(inode) = current_fs_time(inode->i_sb); insert_inode_hash(inode); mark_inode_dirty(inode); mutex_unlock(&sbi->s_alloc_mutex); - if (DQUOT_ALLOC_INODE(inode)) - { + if (DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5b82e489af78..be6326f449a1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -51,18 +51,18 @@ static int udf_update_inode(struct inode *, int); static void udf_fill_inode(struct inode *, struct buffer_head *); static int udf_alloc_i_data(struct inode *inode, size_t size); static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, - long *, int *); + long *, int *); static int8_t udf_insert_aext(struct inode *, struct extent_position, - kernel_lb_addr, uint32_t); + kernel_lb_addr, uint32_t); static void udf_split_extents(struct inode *, int *, int, int, - kernel_long_ad [EXTENT_MERGE_SIZE], int *); + kernel_long_ad[EXTENT_MERGE_SIZE], int *); static void udf_prealloc_extents(struct inode *, int, int, - kernel_long_ad [EXTENT_MERGE_SIZE], int *); + kernel_long_ad[EXTENT_MERGE_SIZE], int *); static void udf_merge_extents(struct inode *, - kernel_long_ad [EXTENT_MERGE_SIZE], int *); + kernel_long_ad[EXTENT_MERGE_SIZE], int *); static void udf_update_extents(struct inode *, - kernel_long_ad [EXTENT_MERGE_SIZE], int, int, - struct extent_position *); + kernel_long_ad[EXTENT_MERGE_SIZE], int, int, + struct extent_position *); static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); /* @@ -81,7 +81,7 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); * * Called at the last iput() if i_nlink is zero. */ -void udf_delete_inode(struct inode * inode) +void udf_delete_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); @@ -97,7 +97,7 @@ void udf_delete_inode(struct inode * inode) unlock_kernel(); return; -no_delete: + no_delete: clear_inode(inode); } @@ -132,26 +132,27 @@ static int udf_readpage(struct file *file, struct page *page) return block_read_full_page(page, udf_get_block); } -static int udf_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int udf_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) { return block_prepare_write(page, from, to, udf_get_block); } static sector_t udf_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping,block,udf_get_block); + return generic_block_bmap(mapping, block, udf_get_block); } const struct address_space_operations udf_aops = { - .readpage = udf_readpage, - .writepage = udf_writepage, - .sync_page = block_sync_page, - .prepare_write = udf_prepare_write, - .commit_write = generic_commit_write, - .bmap = udf_bmap, + .readpage = udf_readpage, + .writepage = udf_writepage, + .sync_page = block_sync_page, + .prepare_write = udf_prepare_write, + .commit_write = generic_commit_write, + .bmap = udf_bmap, }; -void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) +void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err) { struct page *page; char *kaddr; @@ -163,8 +164,7 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; - if (!UDF_I_LENALLOC(inode)) - { + if (!UDF_I_LENALLOC(inode)) { if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_SHORT; else @@ -176,19 +176,18 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) page = grab_cache_page(inode->i_mapping, 0); BUG_ON(!PageLocked(page)); - if (!PageUptodate(page)) - { + if (!PageUptodate(page)) { kaddr = kmap(page); memset(kaddr + UDF_I_LENALLOC(inode), 0x00, - PAGE_CACHE_SIZE - UDF_I_LENALLOC(inode)); + PAGE_CACHE_SIZE - UDF_I_LENALLOC(inode)); memcpy(kaddr, UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), - UDF_I_LENALLOC(inode)); + UDF_I_LENALLOC(inode)); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); } memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0x00, - UDF_I_LENALLOC(inode)); + UDF_I_LENALLOC(inode)); UDF_I_LENALLOC(inode) = 0; if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_SHORT; @@ -201,7 +200,8 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) mark_inode_dirty(inode); } -struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int *err) +struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, + int *err) { int newblock; struct buffer_head *dbh = NULL; @@ -220,8 +220,7 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int else alloctype = ICBTAG_FLAG_AD_LONG; - if (!inode->i_size) - { + if (!inode->i_size) { UDF_I_ALLOCTYPE(inode) = alloctype; mark_inode_dirty(inode); return NULL; @@ -229,13 +228,14 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int /* alloc block, and copy data to it */ *block = udf_new_block(inode->i_sb, inode, - UDF_I_LOCATION(inode).partitionReferenceNum, - UDF_I_LOCATION(inode).logicalBlockNum, err); + UDF_I_LOCATION(inode).partitionReferenceNum, + UDF_I_LOCATION(inode).logicalBlockNum, err); if (!(*block)) return NULL; newblock = udf_get_pblock(inode->i_sb, *block, - UDF_I_LOCATION(inode).partitionReferenceNum, 0); + UDF_I_LOCATION(inode).partitionReferenceNum, + 0); if (!newblock) return NULL; dbh = udf_tgetblk(inode->i_sb, newblock); @@ -247,16 +247,17 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int unlock_buffer(dbh); mark_buffer_dirty_inode(dbh, inode); - sfibh.soffset = sfibh.eoffset = (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2; + sfibh.soffset = sfibh.eoffset = + (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2; sfibh.sbh = sfibh.ebh = NULL; dfibh.soffset = dfibh.eoffset = 0; dfibh.sbh = dfibh.ebh = dbh; - while ( (f_pos < size) ) - { + while ((f_pos < size)) { UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; - sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL); - if (!sfi) - { + sfi = + udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, + NULL, NULL); + if (!sfi) { brelse(dbh); return NULL; } @@ -266,8 +267,8 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int dfibh.eoffset += (sfibh.eoffset - sfibh.soffset); dfi = (struct fileIdentDesc *)(dbh->b_data + dfibh.soffset); if (udf_write_fi(inode, sfi, dfi, &dfibh, sfi->impUse, - sfi->fileIdent + le16_to_cpu(sfi->lengthOfImpUse))) - { + sfi->fileIdent + + le16_to_cpu(sfi->lengthOfImpUse))) { UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB; brelse(dbh); return NULL; @@ -275,10 +276,12 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int } mark_buffer_dirty_inode(dbh, inode); - memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0, UDF_I_LENALLOC(inode)); + memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), 0, + UDF_I_LENALLOC(inode)); UDF_I_LENALLOC(inode) = 0; eloc.logicalBlockNum = *block; - eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; + eloc.partitionReferenceNum = + UDF_I_LOCATION(inode).partitionReferenceNum; elen = inode->i_size; UDF_I_LENEXTENTS(inode) = elen; epos.bh = NULL; @@ -292,14 +295,14 @@ struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int return dbh; } -static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) +static int udf_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create) { int err, new; struct buffer_head *bh; unsigned long phys; - if (!create) - { + if (!create) { phys = udf_block_map(inode, block); if (phys) map_bh(bh_result, inode->i_sb, phys); @@ -315,10 +318,9 @@ static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head if (block < 0) goto abort_negative; - if (block == UDF_I_NEXT_ALLOC_BLOCK(inode) + 1) - { - UDF_I_NEXT_ALLOC_BLOCK(inode) ++; - UDF_I_NEXT_ALLOC_GOAL(inode) ++; + if (block == UDF_I_NEXT_ALLOC_BLOCK(inode) + 1) { + UDF_I_NEXT_ALLOC_BLOCK(inode)++; + UDF_I_NEXT_ALLOC_GOAL(inode)++; } err = 0; @@ -332,29 +334,27 @@ static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head if (new) set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, phys); -abort: + abort: unlock_kernel(); return err; -abort_negative: + abort_negative: udf_warning(inode->i_sb, "udf_get_block", "block < 0"); goto abort; } -static struct buffer_head * -udf_getblk(struct inode *inode, long block, int create, int *err) +static struct buffer_head *udf_getblk(struct inode *inode, long block, + int create, int *err) { struct buffer_head dummy; dummy.b_state = 0; dummy.b_blocknr = -1000; *err = udf_get_block(inode, block, &dummy, create); - if (!*err && buffer_mapped(&dummy)) - { + if (!*err && buffer_mapped(&dummy)) { struct buffer_head *bh; bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (buffer_new(&dummy)) - { + if (buffer_new(&dummy)) { lock_buffer(bh); memset(bh->b_data, 0x00, inode->i_sb->s_blocksize); set_buffer_uptodate(bh); @@ -368,12 +368,12 @@ udf_getblk(struct inode *inode, long block, int create, int *err) /* Extend the file by 'blocks' blocks, return the number of extents added */ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, - kernel_long_ad *last_ext, sector_t blocks) + kernel_long_ad * last_ext, sector_t blocks) { sector_t add; int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); struct super_block *sb = inode->i_sb; - kernel_lb_addr prealloc_loc = {0, 0}; + kernel_lb_addr prealloc_loc = { 0, 0 }; int prealloc_len = 0; /* The previous extent is fake and we should not extend by anything @@ -383,28 +383,32 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, /* Round the last extent up to a multiple of block size */ if (last_ext->extLength & (sb->s_blocksize - 1)) { last_ext->extLength = - (last_ext->extLength & UDF_EXTENT_FLAG_MASK) | - (((last_ext->extLength & UDF_EXTENT_LENGTH_MASK) + - sb->s_blocksize - 1) & ~(sb->s_blocksize - 1)); + (last_ext->extLength & UDF_EXTENT_FLAG_MASK) | + (((last_ext->extLength & UDF_EXTENT_LENGTH_MASK) + + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1)); UDF_I_LENEXTENTS(inode) = - (UDF_I_LENEXTENTS(inode) + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); + (UDF_I_LENEXTENTS(inode) + sb->s_blocksize - 1) & + ~(sb->s_blocksize - 1); } /* Last extent are just preallocated blocks? */ - if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_ALLOCATED) { + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == + EXT_NOT_RECORDED_ALLOCATED) { /* Save the extent so that we can reattach it to the end */ prealloc_loc = last_ext->extLocation; prealloc_len = last_ext->extLength; /* Mark the extent as a hole */ last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLocation.logicalBlockNum = 0; - last_ext->extLocation.partitionReferenceNum = 0; + last_ext->extLocation.partitionReferenceNum = 0; } /* Can we merge with the previous extent? */ - if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { - add = ((1<<30) - sb->s_blocksize - (last_ext->extLength & - UDF_EXTENT_LENGTH_MASK)) >> sb->s_blocksize_bits; + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == + EXT_NOT_RECORDED_NOT_ALLOCATED) { + add = + ((1 << 30) - sb->s_blocksize - + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >> sb-> + s_blocksize_bits; if (add > blocks) add = blocks; blocks -= add; @@ -413,40 +417,42 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos, if (fake) { udf_add_aext(inode, last_pos, last_ext->extLocation, - last_ext->extLength, 1); + last_ext->extLength, 1); count++; - } - else - udf_write_aext(inode, last_pos, last_ext->extLocation, last_ext->extLength, 1); + } else + udf_write_aext(inode, last_pos, last_ext->extLocation, + last_ext->extLength, 1); /* Managed to do everything necessary? */ if (!blocks) goto out; /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */ last_ext->extLocation.logicalBlockNum = 0; - last_ext->extLocation.partitionReferenceNum = 0; - add = (1 << (30-sb->s_blocksize_bits)) - 1; - last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | (add << sb->s_blocksize_bits); + last_ext->extLocation.partitionReferenceNum = 0; + add = (1 << (30 - sb->s_blocksize_bits)) - 1; + last_ext->extLength = + EXT_NOT_RECORDED_NOT_ALLOCATED | (add << sb->s_blocksize_bits); /* Create enough extents to cover the whole hole */ while (blocks > add) { blocks -= add; if (udf_add_aext(inode, last_pos, last_ext->extLocation, - last_ext->extLength, 1) == -1) + last_ext->extLength, 1) == -1) return -1; count++; } if (blocks) { last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (blocks << sb->s_blocksize_bits); + (blocks << sb->s_blocksize_bits); if (udf_add_aext(inode, last_pos, last_ext->extLocation, - last_ext->extLength, 1) == -1) + last_ext->extLength, 1) == -1) return -1; count++; } -out: + out: /* Do we have some preallocated blocks saved? */ if (prealloc_len) { - if (udf_add_aext(inode, last_pos, prealloc_loc, prealloc_len, 1) == -1) + if (udf_add_aext(inode, last_pos, prealloc_loc, prealloc_len, 1) + == -1) return -1; last_ext->extLocation = prealloc_loc; last_ext->extLength = prealloc_len; @@ -462,8 +468,8 @@ out: return count; } -static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, - int *err, long *phys, int *new) +static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, + int *err, long *phys, int *new) { static sector_t last_block; struct buffer_head *result = NULL; @@ -484,21 +490,18 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, prev_epos.block = UDF_I_LOCATION(inode); prev_epos.bh = NULL; cur_epos = next_epos = prev_epos; - b_off = (loff_t)block << inode->i_sb->s_blocksize_bits; + b_off = (loff_t) block << inode->i_sb->s_blocksize_bits; /* find the extent which contains the block we are looking for. - alternate between laarr[0] and laarr[1] for locations of the - current extent, and the previous extent */ - do - { - if (prev_epos.bh != cur_epos.bh) - { + alternate between laarr[0] and laarr[1] for locations of the + current extent, and the previous extent */ + do { + if (prev_epos.bh != cur_epos.bh) { brelse(prev_epos.bh); get_bh(cur_epos.bh); prev_epos.bh = cur_epos.bh; } - if (cur_epos.bh != next_epos.bh) - { + if (cur_epos.bh != next_epos.bh) { brelse(cur_epos.bh); get_bh(next_epos.bh); cur_epos.bh = next_epos.bh; @@ -512,7 +515,8 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, prev_epos.offset = cur_epos.offset; cur_epos.offset = next_epos.offset; - if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1)) == -1) + if ((etype = + udf_next_aext(inode, &next_epos, &eloc, &elen, 1)) == -1) break; c = !c; @@ -522,10 +526,10 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, if (etype != (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) pgoal = eloc.logicalBlockNum + - ((elen + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits); + ((elen + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits); - count ++; + count++; } while (lbcount + elen <= b_off); b_off -= lbcount; @@ -538,15 +542,13 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0); /* if the extent is allocated and recorded, return the block - if the extent is not a multiple of the blocksize, round up */ + if the extent is not a multiple of the blocksize, round up */ - if (etype == (EXT_RECORDED_ALLOCATED >> 30)) - { - if (elen & (inode->i_sb->s_blocksize - 1)) - { + if (etype == (EXT_RECORDED_ALLOCATED >> 30)) { + if (elen & (inode->i_sb->s_blocksize - 1)) { elen = EXT_RECORDED_ALLOCATED | - ((elen + inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1)); + ((elen + inode->i_sb->s_blocksize - 1) & + ~(inode->i_sb->s_blocksize - 1)); etype = udf_write_aext(inode, &cur_epos, eloc, elen, 1); } brelse(prev_epos.bh); @@ -559,18 +561,17 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, last_block = block; /* Are we beyond EOF? */ - if (etype == -1) - { + if (etype == -1) { int ret; if (count) { if (c) laarr[0] = laarr[1]; startnum = 1; - } - else { + } else { /* Create a fake extent when there's not one */ - memset(&laarr[0].extLocation, 0x00, sizeof(kernel_lb_addr)); + memset(&laarr[0].extLocation, 0x00, + sizeof(kernel_lb_addr)); laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; /* Will udf_extend_file() create real extent from a fake one? */ startnum = (offset > 0); @@ -590,26 +591,26 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, offset = 0; count += ret; /* We are not covered by a preallocated extent? */ - if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != EXT_NOT_RECORDED_ALLOCATED) { + if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != + EXT_NOT_RECORDED_ALLOCATED) { /* Is there any real extent? - otherwise we overwrite * the fake one... */ if (count) c = !c; laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - inode->i_sb->s_blocksize; - memset(&laarr[c].extLocation, 0x00, sizeof(kernel_lb_addr)); - count ++; - endnum ++; + inode->i_sb->s_blocksize; + memset(&laarr[c].extLocation, 0x00, + sizeof(kernel_lb_addr)); + count++; + endnum++; } - endnum = c+1; + endnum = c + 1; lastblock = 1; - } - else { + } else { endnum = startnum = ((count > 2) ? 2 : count); /* if the current extent is in position 0, swap it with the previous */ - if (!c && count != 1) - { + if (!c && count != 1) { laarr[2] = laarr[0]; laarr[0] = laarr[1]; laarr[1] = laarr[2]; @@ -617,37 +618,37 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, } /* if the current block is located in an extent, read the next extent */ - if ((etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0)) != -1) - { - laarr[c+1].extLength = (etype << 30) | elen; - laarr[c+1].extLocation = eloc; - count ++; - startnum ++; - endnum ++; - } - else { + if ((etype = + udf_next_aext(inode, &next_epos, &eloc, &elen, 0)) != -1) { + laarr[c + 1].extLength = (etype << 30) | elen; + laarr[c + 1].extLocation = eloc; + count++; + startnum++; + endnum++; + } else { lastblock = 1; } } /* if the current extent is not recorded but allocated, get the - block in the extent corresponding to the requested block */ + block in the extent corresponding to the requested block */ if ((laarr[c].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) newblocknum = laarr[c].extLocation.logicalBlockNum + offset; - else /* otherwise, allocate a new block */ - { + else { /* otherwise, allocate a new block */ + if (UDF_I_NEXT_ALLOC_BLOCK(inode) == block) goal = UDF_I_NEXT_ALLOC_GOAL(inode); - if (!goal) - { + if (!goal) { if (!(goal = pgoal)) - goal = UDF_I_LOCATION(inode).logicalBlockNum + 1; + goal = + UDF_I_LOCATION(inode).logicalBlockNum + 1; } if (!(newblocknum = udf_new_block(inode->i_sb, inode, - UDF_I_LOCATION(inode).partitionReferenceNum, goal, err))) - { + UDF_I_LOCATION(inode). + partitionReferenceNum, goal, + err))) { brelse(prev_epos.bh); *err = -ENOSPC; return NULL; @@ -656,8 +657,8 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, } /* if the extent the requsted block is located in contains multiple blocks, - split the extent into at most three extents. blocks prior to requested - block, requested block, and blocks after requested block */ + split the extent into at most three extents. blocks prior to requested + block, requested block, and blocks after requested block */ udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum); #ifdef UDF_PREALLOCATE @@ -669,15 +670,15 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, udf_merge_extents(inode, laarr, &endnum); /* write back the new extents, inserting new extents if the new number - of extents is greater than the old number, and deleting extents if - the new number of extents is less than the old number */ + of extents is greater than the old number, and deleting extents if + the new number of extents is less than the old number */ udf_update_extents(inode, laarr, startnum, endnum, &prev_epos); brelse(prev_epos.bh); if (!(newblock = udf_get_pblock(inode->i_sb, newblocknum, - UDF_I_LOCATION(inode).partitionReferenceNum, 0))) - { + UDF_I_LOCATION(inode). + partitionReferenceNum, 0))) { return NULL; } *phys = newblock; @@ -694,283 +695,329 @@ static struct buffer_head * inode_getblk(struct inode * inode, sector_t block, return result; } -static void udf_split_extents(struct inode *inode, int *c, int offset, int newblocknum, - kernel_long_ad laarr[EXTENT_MERGE_SIZE], int *endnum) +static void udf_split_extents(struct inode *inode, int *c, int offset, + int newblocknum, + kernel_long_ad laarr[EXTENT_MERGE_SIZE], + int *endnum) { if ((laarr[*c].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30) || - (laarr[*c].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) - { + (laarr[*c].extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) { int curr = *c; int blen = ((laarr[curr].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits; int8_t etype = (laarr[curr].extLength >> 30); - if (blen == 1) - ; - else if (!offset || blen == offset + 1) - { - laarr[curr+2] = laarr[curr+1]; - laarr[curr+1] = laarr[curr]; - } - else - { - laarr[curr+3] = laarr[curr+1]; - laarr[curr+2] = laarr[curr+1] = laarr[curr]; - } - - if (offset) - { - if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - udf_free_blocks(inode->i_sb, inode, laarr[curr].extLocation, 0, offset); - laarr[curr].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (offset << inode->i_sb->s_blocksize_bits); + if (blen == 1) ; + else if (!offset || blen == offset + 1) { + laarr[curr + 2] = laarr[curr + 1]; + laarr[curr + 1] = laarr[curr]; + } else { + laarr[curr + 3] = laarr[curr + 1]; + laarr[curr + 2] = laarr[curr + 1] = laarr[curr]; + } + + if (offset) { + if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + udf_free_blocks(inode->i_sb, inode, + laarr[curr].extLocation, 0, + offset); + laarr[curr].extLength = + EXT_NOT_RECORDED_NOT_ALLOCATED | (offset << + inode-> + i_sb-> + s_blocksize_bits); laarr[curr].extLocation.logicalBlockNum = 0; - laarr[curr].extLocation.partitionReferenceNum = 0; - } - else + laarr[curr].extLocation.partitionReferenceNum = + 0; + } else laarr[curr].extLength = (etype << 30) | - (offset << inode->i_sb->s_blocksize_bits); - curr ++; - (*c) ++; - (*endnum) ++; + (offset << inode->i_sb->s_blocksize_bits); + curr++; + (*c)++; + (*endnum)++; } laarr[curr].extLocation.logicalBlockNum = newblocknum; if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) laarr[curr].extLocation.partitionReferenceNum = - UDF_I_LOCATION(inode).partitionReferenceNum; + UDF_I_LOCATION(inode).partitionReferenceNum; laarr[curr].extLength = EXT_RECORDED_ALLOCATED | - inode->i_sb->s_blocksize; - curr ++; + inode->i_sb->s_blocksize; + curr++; - if (blen != offset + 1) - { + if (blen != offset + 1) { if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - laarr[curr].extLocation.logicalBlockNum += (offset + 1); - laarr[curr].extLength = (etype << 30) | - ((blen - (offset + 1)) << inode->i_sb->s_blocksize_bits); - curr ++; - (*endnum) ++; + laarr[curr].extLocation.logicalBlockNum += + (offset + 1); + laarr[curr].extLength = + (etype << 30) | ((blen - (offset + 1)) << inode-> + i_sb->s_blocksize_bits); + curr++; + (*endnum)++; } } } static void udf_prealloc_extents(struct inode *inode, int c, int lastblock, - kernel_long_ad laarr[EXTENT_MERGE_SIZE], int *endnum) + kernel_long_ad laarr[EXTENT_MERGE_SIZE], + int *endnum) { int start, length = 0, currlength = 0, i; - if (*endnum >= (c+1)) - { + if (*endnum >= (c + 1)) { if (!lastblock) return; else start = c; - } - else - { - if ((laarr[c+1].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - start = c+1; - length = currlength = (((laarr[c+1].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); - } - else + } else { + if ((laarr[c + 1].extLength >> 30) == + (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + start = c + 1; + length = currlength = + (((laarr[c + 1]. + extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits); + } else start = c; } - for (i=start+1; i<=*endnum; i++) - { - if (i == *endnum) - { + for (i = start + 1; i <= *endnum; i++) { + if (i == *endnum) { if (lastblock) length += UDF_DEFAULT_PREALLOC_BLOCKS; - } - else if ((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) - length += (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); + } else if ((laarr[i].extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) + length += + (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits); else break; } - if (length) - { + if (length) { int next = laarr[start].extLocation.logicalBlockNum + - (((laarr[start].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); + (((laarr[start].extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits); int numalloc = udf_prealloc_blocks(inode->i_sb, inode, - laarr[start].extLocation.partitionReferenceNum, - next, (UDF_DEFAULT_PREALLOC_BLOCKS > length ? length : - UDF_DEFAULT_PREALLOC_BLOCKS) - currlength); - - if (numalloc) - { - if (start == (c+1)) + laarr[start].extLocation. + partitionReferenceNum, + next, + (UDF_DEFAULT_PREALLOC_BLOCKS + > + length ? length : + UDF_DEFAULT_PREALLOC_BLOCKS) + - currlength); + + if (numalloc) { + if (start == (c + 1)) laarr[start].extLength += - (numalloc << inode->i_sb->s_blocksize_bits); - else - { - memmove(&laarr[c+2], &laarr[c+1], - sizeof(long_ad) * (*endnum - (c+1))); - (*endnum) ++; - laarr[c+1].extLocation.logicalBlockNum = next; - laarr[c+1].extLocation.partitionReferenceNum = - laarr[c].extLocation.partitionReferenceNum; - laarr[c+1].extLength = EXT_NOT_RECORDED_ALLOCATED | - (numalloc << inode->i_sb->s_blocksize_bits); - start = c+1; + (numalloc << inode->i_sb->s_blocksize_bits); + else { + memmove(&laarr[c + 2], &laarr[c + 1], + sizeof(long_ad) * (*endnum - (c + 1))); + (*endnum)++; + laarr[c + 1].extLocation.logicalBlockNum = next; + laarr[c + 1].extLocation.partitionReferenceNum = + laarr[c].extLocation.partitionReferenceNum; + laarr[c + 1].extLength = + EXT_NOT_RECORDED_ALLOCATED | (numalloc << + inode->i_sb-> + s_blocksize_bits); + start = c + 1; } - for (i=start+1; numalloc && i<*endnum; i++) - { - int elen = ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; + for (i = start + 1; numalloc && i < *endnum; i++) { + int elen = + ((laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits; - if (elen > numalloc) - { + if (elen > numalloc) { laarr[i].extLength -= - (numalloc << inode->i_sb->s_blocksize_bits); + (numalloc << inode->i_sb-> + s_blocksize_bits); numalloc = 0; - } - else - { + } else { numalloc -= elen; - if (*endnum > (i+1)) - memmove(&laarr[i], &laarr[i+1], - sizeof(long_ad) * (*endnum - (i+1))); - i --; - (*endnum) --; + if (*endnum > (i + 1)) + memmove(&laarr[i], + &laarr[i + 1], + sizeof(long_ad) * + (*endnum - (i + 1))); + i--; + (*endnum)--; } } - UDF_I_LENEXTENTS(inode) += numalloc << inode->i_sb->s_blocksize_bits; + UDF_I_LENEXTENTS(inode) += + numalloc << inode->i_sb->s_blocksize_bits; } } } static void udf_merge_extents(struct inode *inode, - kernel_long_ad laarr[EXTENT_MERGE_SIZE], int *endnum) + kernel_long_ad laarr[EXTENT_MERGE_SIZE], + int *endnum) { int i; - for (i=0; i<(*endnum-1); i++) - { - if ((laarr[i].extLength >> 30) == (laarr[i+1].extLength >> 30)) - { - if (((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) || - ((laarr[i+1].extLocation.logicalBlockNum - laarr[i].extLocation.logicalBlockNum) == - (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits))) - { - if (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - (laarr[i+1].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) - { - laarr[i+1].extLength = (laarr[i+1].extLength - - (laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - UDF_EXTENT_LENGTH_MASK) & ~(inode->i_sb->s_blocksize-1); - laarr[i].extLength = (laarr[i].extLength & UDF_EXTENT_FLAG_MASK) + - (UDF_EXTENT_LENGTH_MASK + 1) - inode->i_sb->s_blocksize; - laarr[i+1].extLocation.logicalBlockNum = - laarr[i].extLocation.logicalBlockNum + - ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) >> - inode->i_sb->s_blocksize_bits); - } - else - { - laarr[i].extLength = laarr[i+1].extLength + - (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize-1)); - if (*endnum > (i+2)) - memmove(&laarr[i+1], &laarr[i+2], - sizeof(long_ad) * (*endnum - (i+2))); - i --; - (*endnum) --; + for (i = 0; i < (*endnum - 1); i++) { + if ((laarr[i].extLength >> 30) == + (laarr[i + 1].extLength >> 30)) { + if (((laarr[i].extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) + || + ((laarr[i + 1].extLocation.logicalBlockNum - + laarr[i].extLocation.logicalBlockNum) == + (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits))) { + if (((laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) + + (laarr[i + 1]. + extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) & ~UDF_EXTENT_LENGTH_MASK) { + laarr[i + 1].extLength = + (laarr[i + 1].extLength - + (laarr[i]. + extLength & + UDF_EXTENT_LENGTH_MASK) + + UDF_EXTENT_LENGTH_MASK) & ~(inode-> + i_sb-> + s_blocksize + - 1); + laarr[i].extLength = + (laarr[i]. + extLength & UDF_EXTENT_FLAG_MASK) + + (UDF_EXTENT_LENGTH_MASK + 1) - + inode->i_sb->s_blocksize; + laarr[i + + 1].extLocation.logicalBlockNum = + laarr[i].extLocation. + logicalBlockNum + + ((laarr[i]. + extLength & + UDF_EXTENT_LENGTH_MASK) >> inode-> + i_sb->s_blocksize_bits); + } else { + laarr[i].extLength = + laarr[i + 1].extLength + + (((laarr[i]. + extLength & + UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) & ~(inode->i_sb->s_blocksize - + 1)); + if (*endnum > (i + 2)) + memmove(&laarr[i + 1], + &laarr[i + 2], + sizeof(long_ad) * + (*endnum - (i + 2))); + i--; + (*endnum)--; } } - } - else if (((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) && - ((laarr[i+1].extLength >> 30) == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) - { - udf_free_blocks(inode->i_sb, inode, laarr[i].extLocation, 0, - ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); + } else + if (((laarr[i].extLength >> 30) == + (EXT_NOT_RECORDED_ALLOCATED >> 30)) + && ((laarr[i + 1].extLength >> 30) == + (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))) { + udf_free_blocks(inode->i_sb, inode, + laarr[i].extLocation, 0, + ((laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits); laarr[i].extLocation.logicalBlockNum = 0; laarr[i].extLocation.partitionReferenceNum = 0; if (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - (laarr[i+1].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) - { - laarr[i+1].extLength = (laarr[i+1].extLength - - (laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - UDF_EXTENT_LENGTH_MASK) & ~(inode->i_sb->s_blocksize-1); - laarr[i].extLength = (laarr[i].extLength & UDF_EXTENT_FLAG_MASK) + - (UDF_EXTENT_LENGTH_MASK + 1) - inode->i_sb->s_blocksize; + (laarr[i + 1].extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) & ~UDF_EXTENT_LENGTH_MASK) { + laarr[i + 1].extLength = + (laarr[i + 1].extLength - + (laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) + + UDF_EXTENT_LENGTH_MASK) & ~(inode->i_sb-> + s_blocksize - + 1); + laarr[i].extLength = + (laarr[i]. + extLength & UDF_EXTENT_FLAG_MASK) + + (UDF_EXTENT_LENGTH_MASK + 1) - + inode->i_sb->s_blocksize; + } else { + laarr[i].extLength = laarr[i + 1].extLength + + (((laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) & ~(inode->i_sb->s_blocksize - 1)); + if (*endnum > (i + 2)) + memmove(&laarr[i + 1], &laarr[i + 2], + sizeof(long_ad) * (*endnum - + (i + 2))); + i--; + (*endnum)--; } - else - { - laarr[i].extLength = laarr[i+1].extLength + - (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize-1)); - if (*endnum > (i+2)) - memmove(&laarr[i+1], &laarr[i+2], - sizeof(long_ad) * (*endnum - (i+2))); - i --; - (*endnum) --; - } - } - else if ((laarr[i].extLength >> 30) == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - udf_free_blocks(inode->i_sb, inode, laarr[i].extLocation, 0, - ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) + - inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits); + } else if ((laarr[i].extLength >> 30) == + (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + udf_free_blocks(inode->i_sb, inode, + laarr[i].extLocation, 0, + ((laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) + + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits); laarr[i].extLocation.logicalBlockNum = 0; laarr[i].extLocation.partitionReferenceNum = 0; - laarr[i].extLength = (laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) | - EXT_NOT_RECORDED_NOT_ALLOCATED; + laarr[i].extLength = + (laarr[i]. + extLength & UDF_EXTENT_LENGTH_MASK) | + EXT_NOT_RECORDED_NOT_ALLOCATED; } } } static void udf_update_extents(struct inode *inode, - kernel_long_ad laarr[EXTENT_MERGE_SIZE], int startnum, int endnum, - struct extent_position *epos) + kernel_long_ad laarr[EXTENT_MERGE_SIZE], + int startnum, int endnum, + struct extent_position *epos) { int start = 0, i; kernel_lb_addr tmploc; uint32_t tmplen; - if (startnum > endnum) - { - for (i=0; i<(startnum-endnum); i++) + if (startnum > endnum) { + for (i = 0; i < (startnum - endnum); i++) udf_delete_aext(inode, *epos, laarr[i].extLocation, - laarr[i].extLength); - } - else if (startnum < endnum) - { - for (i=0; i<(endnum-startnum); i++) - { + laarr[i].extLength); + } else if (startnum < endnum) { + for (i = 0; i < (endnum - startnum); i++) { udf_insert_aext(inode, *epos, laarr[i].extLocation, - laarr[i].extLength); + laarr[i].extLength); udf_next_aext(inode, epos, &laarr[i].extLocation, - &laarr[i].extLength, 1); - start ++; + &laarr[i].extLength, 1); + start++; } } - for (i=start; i<endnum; i++) - { + for (i = start; i < endnum; i++) { udf_next_aext(inode, epos, &tmploc, &tmplen, 0); udf_write_aext(inode, epos, laarr[i].extLocation, - laarr[i].extLength, 1); + laarr[i].extLength, 1); } } -struct buffer_head * udf_bread(struct inode * inode, int block, - int create, int * err) +struct buffer_head *udf_bread(struct inode *inode, int block, + int create, int *err) { - struct buffer_head * bh = NULL; + struct buffer_head *bh = NULL; bh = udf_getblk(inode, block, create, err); if (!bh) @@ -987,56 +1034,51 @@ struct buffer_head * udf_bread(struct inode * inode, int block, return NULL; } -void udf_truncate(struct inode * inode) +void udf_truncate(struct inode *inode) { int offset; int err; if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) + S_ISLNK(inode->i_mode))) return; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; lock_kernel(); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) - { - if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + - inode->i_size)) - { + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { + if (inode->i_sb->s_blocksize < + (udf_file_entry_alloc_offset(inode) + inode->i_size)) { udf_expand_file_adinicb(inode, inode->i_size, &err); - if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) { inode->i_size = UDF_I_LENALLOC(inode); unlock_kernel(); return; - } - else + } else udf_truncate_extents(inode); - } - else - { + } else { offset = inode->i_size & (inode->i_sb->s_blocksize - 1); - memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, 0x00, inode->i_sb->s_blocksize - offset - udf_file_entry_alloc_offset(inode)); + memset(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + + offset, 0x00, + inode->i_sb->s_blocksize - offset - + udf_file_entry_alloc_offset(inode)); UDF_I_LENALLOC(inode) = inode->i_size; } - } - else - { - block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block); + } else { + block_truncate_page(inode->i_mapping, inode->i_size, + udf_get_block); udf_truncate_extents(inode); } inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); if (IS_SYNC(inode)) - udf_sync_inode (inode); + udf_sync_inode(inode); else mark_inode_dirty(inode); unlock_kernel(); } -static void -__udf_read_inode(struct inode *inode) +static void __udf_read_inode(struct inode *inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; @@ -1056,19 +1098,18 @@ __udf_read_inode(struct inode *inode) */ bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident); - if (!bh) - { + if (!bh) { printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", - inode->i_ino); + inode->i_ino); make_bad_inode(inode); return; } if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && - ident != TAG_IDENT_USE) - { - printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed ident=%d\n", - inode->i_ino, ident); + ident != TAG_IDENT_USE) { + printk(KERN_ERR + "udf: udf_read_inode(ino %ld) failed ident=%d\n", + inode->i_ino, ident); brelse(bh); make_bad_inode(inode); return; @@ -1076,51 +1117,46 @@ __udf_read_inode(struct inode *inode) fe = (struct fileEntry *)bh->b_data; - if (le16_to_cpu(fe->icbTag.strategyType) == 4096) - { + if (le16_to_cpu(fe->icbTag.strategyType) == 4096) { struct buffer_head *ibh = NULL, *nbh = NULL; struct indirectEntry *ie; - ibh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 1, &ident); - if (ident == TAG_IDENT_IE) - { - if (ibh) - { + ibh = + udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 1, + &ident); + if (ident == TAG_IDENT_IE) { + if (ibh) { kernel_lb_addr loc; ie = (struct indirectEntry *)ibh->b_data; loc = lelb_to_cpu(ie->indirectICB.extLocation); if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident))) - { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) - { - memcpy(&UDF_I_LOCATION(inode), &loc, sizeof(kernel_lb_addr)); + (nbh = + udf_read_ptagged(inode->i_sb, loc, 0, + &ident))) { + if (ident == TAG_IDENT_FE + || ident == TAG_IDENT_EFE) { + memcpy(&UDF_I_LOCATION(inode), + &loc, + sizeof(kernel_lb_addr)); brelse(bh); brelse(ibh); brelse(nbh); __udf_read_inode(inode); return; - } - else - { + } else { brelse(nbh); brelse(ibh); } - } - else + } else brelse(ibh); } - } - else + } else brelse(ibh); - } - else if (le16_to_cpu(fe->icbTag.strategyType) != 4) - { + } else if (le16_to_cpu(fe->icbTag.strategyType) != 4) { printk(KERN_ERR "udf: unsupported strategy type: %d\n", - le16_to_cpu(fe->icbTag.strategyType)); + le16_to_cpu(fe->icbTag.strategyType)); brelse(bh); make_bad_inode(inode); return; @@ -1143,62 +1179,70 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) if (le16_to_cpu(fe->icbTag.strategyType) == 4) UDF_I_STRAT4096(inode) = 0; - else /* if (le16_to_cpu(fe->icbTag.strategyType) == 4096) */ + else /* if (le16_to_cpu(fe->icbTag.strategyType) == 4096) */ UDF_I_STRAT4096(inode) = 1; - UDF_I_ALLOCTYPE(inode) = le16_to_cpu(fe->icbTag.flags) & ICBTAG_FLAG_AD_MASK; + UDF_I_ALLOCTYPE(inode) = + le16_to_cpu(fe->icbTag.flags) & ICBTAG_FLAG_AD_MASK; UDF_I_UNIQUE(inode) = 0; UDF_I_LENEATTR(inode) = 0; UDF_I_LENEXTENTS(inode) = 0; UDF_I_LENALLOC(inode) = 0; UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; UDF_I_NEXT_ALLOC_GOAL(inode) = 0; - if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_EFE) - { + if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_EFE) { UDF_I_EFE(inode) = 1; UDF_I_USE(inode) = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry))) - { + if (udf_alloc_i_data + (inode, + inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry))) { make_bad_inode(inode); return; } - memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); - } - else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) - { + memcpy(UDF_I_DATA(inode), + bh->b_data + sizeof(struct extendedFileEntry), + inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry)); + } else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_FE) { UDF_I_EFE(inode) = 0; UDF_I_USE(inode) = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct fileEntry))) - { + if (udf_alloc_i_data + (inode, + inode->i_sb->s_blocksize - sizeof(struct fileEntry))) { make_bad_inode(inode); return; } - memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); - } - else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) - { + memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct fileEntry), + inode->i_sb->s_blocksize - sizeof(struct fileEntry)); + } else if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) { UDF_I_EFE(inode) = 0; UDF_I_USE(inode) = 1; UDF_I_LENALLOC(inode) = - le32_to_cpu( - ((struct unallocSpaceEntry *)bh->b_data)->lengthAllocDescs); - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry))) - { + le32_to_cpu(((struct unallocSpaceEntry *)bh->b_data)-> + lengthAllocDescs); + if (udf_alloc_i_data + (inode, + inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry))) { make_bad_inode(inode); return; } - memcpy(UDF_I_DATA(inode), bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); + memcpy(UDF_I_DATA(inode), + bh->b_data + sizeof(struct unallocSpaceEntry), + inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry)); return; } inode->i_uid = le32_to_cpu(fe->uid); if (inode->i_uid == -1 || UDF_QUERY_FLAG(inode->i_sb, - UDF_FLAG_UID_IGNORE)) + UDF_FLAG_UID_IGNORE)) inode->i_uid = UDF_SB(inode->i_sb)->s_uid; inode->i_gid = le32_to_cpu(fe->gid); if (inode->i_gid == -1 || UDF_QUERY_FLAG(inode->i_sb, - UDF_FLAG_GID_IGNORE)) + UDF_FLAG_GID_IGNORE)) inode->i_gid = UDF_SB(inode->i_sb)->s_gid; inode->i_nlink = le16_to_cpu(fe->fileLinkCount); @@ -1211,41 +1255,31 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_mode = udf_convert_permissions(fe); inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask; - if (UDF_I_EFE(inode) == 0) - { + if (UDF_I_EFE(inode) == 0) { inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << - (inode->i_sb->s_blocksize_bits - 9); + (inode->i_sb->s_blocksize_bits - 9); - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(fe->accessTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(fe->accessTime))) { inode->i_atime.tv_sec = convtime; inode->i_atime.tv_nsec = convtime_usec * 1000; - } - else - { + } else { inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb); } - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(fe->modificationTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(fe->modificationTime))) { inode->i_mtime.tv_sec = convtime; inode->i_mtime.tv_nsec = convtime_usec * 1000; - } - else - { + } else { inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb); } - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(fe->attrTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(fe->attrTime))) { inode->i_ctime.tv_sec = convtime; inode->i_ctime.tv_nsec = convtime_usec * 1000; - } - else - { + } else { inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb); } @@ -1253,65 +1287,51 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) UDF_I_LENEATTR(inode) = le32_to_cpu(fe->lengthExtendedAttr); UDF_I_LENALLOC(inode) = le32_to_cpu(fe->lengthAllocDescs); offset = sizeof(struct fileEntry) + UDF_I_LENEATTR(inode); - } - else - { + } else { inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << - (inode->i_sb->s_blocksize_bits - 9); + (inode->i_sb->s_blocksize_bits - 9); - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->accessTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(efe->accessTime))) { inode->i_atime.tv_sec = convtime; inode->i_atime.tv_nsec = convtime_usec * 1000; - } - else - { + } else { inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb); } - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->modificationTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(efe->modificationTime))) { inode->i_mtime.tv_sec = convtime; inode->i_mtime.tv_nsec = convtime_usec * 1000; - } - else - { + } else { inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb); } - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->createTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(efe->createTime))) { UDF_I_CRTIME(inode).tv_sec = convtime; UDF_I_CRTIME(inode).tv_nsec = convtime_usec * 1000; - } - else - { + } else { UDF_I_CRTIME(inode) = UDF_SB_RECORDTIME(inode->i_sb); } - if ( udf_stamp_to_time(&convtime, &convtime_usec, - lets_to_cpu(efe->attrTime)) ) - { + if (udf_stamp_to_time(&convtime, &convtime_usec, + lets_to_cpu(efe->attrTime))) { inode->i_ctime.tv_sec = convtime; inode->i_ctime.tv_nsec = convtime_usec * 1000; - } - else - { + } else { inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb); } UDF_I_UNIQUE(inode) = le64_to_cpu(efe->uniqueID); UDF_I_LENEATTR(inode) = le32_to_cpu(efe->lengthExtendedAttr); UDF_I_LENALLOC(inode) = le32_to_cpu(efe->lengthAllocDescs); - offset = sizeof(struct extendedFileEntry) + UDF_I_LENEATTR(inode); + offset = + sizeof(struct extendedFileEntry) + UDF_I_LENEATTR(inode); } - switch (fe->icbTag.fileType) - { - case ICBTAG_FILE_TYPE_DIRECTORY: + switch (fe->icbTag.fileType) { + case ICBTAG_FILE_TYPE_DIRECTORY: { inode->i_op = &udf_dir_inode_operations; inode->i_fop = &udf_dir_operations; @@ -1319,9 +1339,9 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inc_nlink(inode); break; } - case ICBTAG_FILE_TYPE_REALTIME: - case ICBTAG_FILE_TYPE_REGULAR: - case ICBTAG_FILE_TYPE_UNDEF: + case ICBTAG_FILE_TYPE_REALTIME: + case ICBTAG_FILE_TYPE_REGULAR: + case ICBTAG_FILE_TYPE_UNDEF: { if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) inode->i_data.a_ops = &udf_adinicb_aops; @@ -1332,56 +1352,54 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_mode |= S_IFREG; break; } - case ICBTAG_FILE_TYPE_BLOCK: + case ICBTAG_FILE_TYPE_BLOCK: { inode->i_mode |= S_IFBLK; break; } - case ICBTAG_FILE_TYPE_CHAR: + case ICBTAG_FILE_TYPE_CHAR: { inode->i_mode |= S_IFCHR; break; } - case ICBTAG_FILE_TYPE_FIFO: + case ICBTAG_FILE_TYPE_FIFO: { init_special_inode(inode, inode->i_mode | S_IFIFO, 0); break; } - case ICBTAG_FILE_TYPE_SOCKET: + case ICBTAG_FILE_TYPE_SOCKET: { init_special_inode(inode, inode->i_mode | S_IFSOCK, 0); break; } - case ICBTAG_FILE_TYPE_SYMLINK: + case ICBTAG_FILE_TYPE_SYMLINK: { inode->i_data.a_ops = &udf_symlink_aops; inode->i_op = &page_symlink_inode_operations; - inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_mode = S_IFLNK | S_IRWXUGO; break; } - default: + default: { - printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown file type=%d\n", - inode->i_ino, fe->icbTag.fileType); + printk(KERN_ERR + "udf: udf_fill_inode(ino %ld) failed unknown file type=%d\n", + inode->i_ino, fe->icbTag.fileType); make_bad_inode(inode); return; } } - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - { - struct deviceSpec *dsea = - (struct deviceSpec *) - udf_get_extendedattr(inode, 12, 1); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + struct deviceSpec *dsea = (struct deviceSpec *) + udf_get_extendedattr(inode, 12, 1); - if (dsea) - { - init_special_inode(inode, inode->i_mode, MKDEV( - le32_to_cpu(dsea->majorDeviceIdent), - le32_to_cpu(dsea->minorDeviceIdent))); + if (dsea) { + init_special_inode(inode, inode->i_mode, + MKDEV(le32_to_cpu + (dsea->majorDeviceIdent), + le32_to_cpu(dsea-> + minorDeviceIdent))); /* Developer ID ??? */ - } - else - { + } else { make_bad_inode(inode); } } @@ -1391,9 +1409,9 @@ static int udf_alloc_i_data(struct inode *inode, size_t size) { UDF_I_DATA(inode) = kmalloc(size, GFP_KERNEL); - if (!UDF_I_DATA(inode)) - { - printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) no free memory\n", + if (!UDF_I_DATA(inode)) { + printk(KERN_ERR + "udf:udf_alloc_i_data (ino %ld) no free memory\n", inode->i_ino); return -ENOMEM; } @@ -1401,8 +1419,7 @@ static int udf_alloc_i_data(struct inode *inode, size_t size) return 0; } -static mode_t -udf_convert_permissions(struct fileEntry *fe) +static mode_t udf_convert_permissions(struct fileEntry *fe) { mode_t mode; uint32_t permissions; @@ -1411,12 +1428,12 @@ udf_convert_permissions(struct fileEntry *fe) permissions = le32_to_cpu(fe->permissions); flags = le16_to_cpu(fe->icbTag.flags); - mode = (( permissions ) & S_IRWXO) | - (( permissions >> 2 ) & S_IRWXG) | - (( permissions >> 4 ) & S_IRWXU) | - (( flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) | - (( flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) | - (( flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0); + mode = ((permissions) & S_IRWXO) | + ((permissions >> 2) & S_IRWXG) | + ((permissions >> 4) & S_IRWXU) | + ((flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) | + ((flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) | + ((flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0); return mode; } @@ -1436,7 +1453,7 @@ udf_convert_permissions(struct fileEntry *fe) * Written, tested, and released. */ -int udf_write_inode(struct inode * inode, int sync) +int udf_write_inode(struct inode *inode, int sync) { int ret; lock_kernel(); @@ -1445,13 +1462,12 @@ int udf_write_inode(struct inode * inode, int sync) return ret; } -int udf_sync_inode(struct inode * inode) +int udf_sync_inode(struct inode *inode) { return udf_update_inode(inode, 1); } -static int -udf_update_inode(struct inode *inode, int do_sync) +static int udf_update_inode(struct inode *inode, int do_sync) { struct buffer_head *bh = NULL; struct fileEntry *fe; @@ -1464,10 +1480,10 @@ udf_update_inode(struct inode *inode, int do_sync) int err = 0; bh = udf_tread(inode->i_sb, - udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), 0)); + udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), + 0)); - if (!bh) - { + if (!bh) { udf_debug("bread failure\n"); return -EIO; } @@ -1477,23 +1493,29 @@ udf_update_inode(struct inode *inode, int do_sync) fe = (struct fileEntry *)bh->b_data; efe = (struct extendedFileEntry *)bh->b_data; - if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) - { + if (le16_to_cpu(fe->descTag.tagIdent) == TAG_IDENT_USE) { struct unallocSpaceEntry *use = - (struct unallocSpaceEntry *)bh->b_data; + (struct unallocSpaceEntry *)bh->b_data; use->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode)); - memcpy(bh->b_data + sizeof(struct unallocSpaceEntry), UDF_I_DATA(inode), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); - crclen = sizeof(struct unallocSpaceEntry) + UDF_I_LENALLOC(inode) - - sizeof(tag); - use->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); + memcpy(bh->b_data + sizeof(struct unallocSpaceEntry), + UDF_I_DATA(inode), + inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry)); + crclen = + sizeof(struct unallocSpaceEntry) + UDF_I_LENALLOC(inode) - + sizeof(tag); + use->descTag.tagLocation = + cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); use->descTag.descCRCLength = cpu_to_le16(crclen); - use->descTag.descCRC = cpu_to_le16(udf_crc((char *)use + sizeof(tag), crclen, 0)); + use->descTag.descCRC = + cpu_to_le16(udf_crc((char *)use + sizeof(tag), crclen, 0)); use->descTag.tagChecksum = 0; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) - use->descTag.tagChecksum += ((uint8_t *)&(use->descTag))[i]; + use->descTag.tagChecksum += + ((uint8_t *) & (use->descTag))[i]; mark_buffer_dirty(bh); brelse(bh); @@ -1502,20 +1524,21 @@ udf_update_inode(struct inode *inode, int do_sync) if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) fe->uid = cpu_to_le32(-1); - else fe->uid = cpu_to_le32(inode->i_uid); + else + fe->uid = cpu_to_le32(inode->i_uid); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) fe->gid = cpu_to_le32(-1); - else fe->gid = cpu_to_le32(inode->i_gid); + else + fe->gid = cpu_to_le32(inode->i_gid); - udfperms = ((inode->i_mode & S_IRWXO) ) | - ((inode->i_mode & S_IRWXG) << 2) | - ((inode->i_mode & S_IRWXU) << 4); + udfperms = ((inode->i_mode & S_IRWXO)) | + ((inode->i_mode & S_IRWXG) << 2) | ((inode->i_mode & S_IRWXU) << 4); - udfperms |= (le32_to_cpu(fe->permissions) & - (FE_PERM_O_DELETE | FE_PERM_O_CHATTR | - FE_PERM_G_DELETE | FE_PERM_G_CHATTR | - FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); + udfperms |= (le32_to_cpu(fe->permissions) & + (FE_PERM_O_DELETE | FE_PERM_O_CHATTR | + FE_PERM_G_DELETE | FE_PERM_G_CHATTR | + FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); fe->permissions = cpu_to_le32(udfperms); if (S_ISDIR(inode->i_mode)) @@ -1525,26 +1548,24 @@ udf_update_inode(struct inode *inode, int do_sync) fe->informationLength = cpu_to_le64(inode->i_size); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - { + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { regid *eid; - struct deviceSpec *dsea = - (struct deviceSpec *) - udf_get_extendedattr(inode, 12, 1); + struct deviceSpec *dsea = (struct deviceSpec *) + udf_get_extendedattr(inode, 12, 1); - if (!dsea) - { + if (!dsea) { dsea = (struct deviceSpec *) - udf_add_extendedattr(inode, - sizeof(struct deviceSpec) + - sizeof(regid), 12, 0x3); + udf_add_extendedattr(inode, + sizeof(struct deviceSpec) + + sizeof(regid), 12, 0x3); dsea->attrType = cpu_to_le32(12); dsea->attrSubtype = 1; - dsea->attrLength = cpu_to_le32(sizeof(struct deviceSpec) + - sizeof(regid)); + dsea->attrLength = + cpu_to_le32(sizeof(struct deviceSpec) + + sizeof(regid)); dsea->impUseLength = cpu_to_le32(sizeof(regid)); } - eid = (regid *)dsea->impUse; + eid = (regid *) dsea->impUse; memset(eid, 0, sizeof(regid)); strcpy(eid->ident, UDF_ID_DEVELOPER); eid->identSuffix[0] = UDF_OS_CLASS_UNIX; @@ -1553,12 +1574,13 @@ udf_update_inode(struct inode *inode, int do_sync) dsea->minorDeviceIdent = cpu_to_le32(iminor(inode)); } - if (UDF_I_EFE(inode) == 0) - { - memcpy(bh->b_data + sizeof(struct fileEntry), UDF_I_DATA(inode), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); - fe->logicalBlocksRecorded = cpu_to_le64( - (inode->i_blocks + (1 << (inode->i_sb->s_blocksize_bits - 9)) - 1) >> - (inode->i_sb->s_blocksize_bits - 9)); + if (UDF_I_EFE(inode) == 0) { + memcpy(bh->b_data + sizeof(struct fileEntry), UDF_I_DATA(inode), + inode->i_sb->s_blocksize - sizeof(struct fileEntry)); + fe->logicalBlocksRecorded = + cpu_to_le64((inode->i_blocks + + (1 << (inode->i_sb->s_blocksize_bits - 9)) - + 1) >> (inode->i_sb->s_blocksize_bits - 9)); if (udf_time_to_stamp(&cpu_time, inode->i_atime)) fe->accessTime = cpu_to_lets(cpu_time); @@ -1575,31 +1597,34 @@ udf_update_inode(struct inode *inode, int do_sync) fe->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode)); fe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_FE); crclen = sizeof(struct fileEntry); - } - else - { - memcpy(bh->b_data + sizeof(struct extendedFileEntry), UDF_I_DATA(inode), inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); + } else { + memcpy(bh->b_data + sizeof(struct extendedFileEntry), + UDF_I_DATA(inode), + inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry)); efe->objectSize = cpu_to_le64(inode->i_size); - efe->logicalBlocksRecorded = cpu_to_le64( - (inode->i_blocks + (1 << (inode->i_sb->s_blocksize_bits - 9)) - 1) >> - (inode->i_sb->s_blocksize_bits - 9)); + efe->logicalBlocksRecorded = cpu_to_le64((inode->i_blocks + + (1 << + (inode->i_sb-> + s_blocksize_bits - + 9)) - + 1) >> (inode->i_sb-> + s_blocksize_bits + - 9)); if (UDF_I_CRTIME(inode).tv_sec > inode->i_atime.tv_sec || - (UDF_I_CRTIME(inode).tv_sec == inode->i_atime.tv_sec && - UDF_I_CRTIME(inode).tv_nsec > inode->i_atime.tv_nsec)) - { + (UDF_I_CRTIME(inode).tv_sec == inode->i_atime.tv_sec && + UDF_I_CRTIME(inode).tv_nsec > inode->i_atime.tv_nsec)) { UDF_I_CRTIME(inode) = inode->i_atime; } if (UDF_I_CRTIME(inode).tv_sec > inode->i_mtime.tv_sec || - (UDF_I_CRTIME(inode).tv_sec == inode->i_mtime.tv_sec && - UDF_I_CRTIME(inode).tv_nsec > inode->i_mtime.tv_nsec)) - { + (UDF_I_CRTIME(inode).tv_sec == inode->i_mtime.tv_sec && + UDF_I_CRTIME(inode).tv_nsec > inode->i_mtime.tv_nsec)) { UDF_I_CRTIME(inode) = inode->i_mtime; } if (UDF_I_CRTIME(inode).tv_sec > inode->i_ctime.tv_sec || - (UDF_I_CRTIME(inode).tv_sec == inode->i_ctime.tv_sec && - UDF_I_CRTIME(inode).tv_nsec > inode->i_ctime.tv_nsec)) - { + (UDF_I_CRTIME(inode).tv_sec == inode->i_ctime.tv_sec && + UDF_I_CRTIME(inode).tv_nsec > inode->i_ctime.tv_nsec)) { UDF_I_CRTIME(inode) = inode->i_ctime; } @@ -1622,14 +1647,11 @@ udf_update_inode(struct inode *inode, int do_sync) efe->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EFE); crclen = sizeof(struct extendedFileEntry); } - if (UDF_I_STRAT4096(inode)) - { + if (UDF_I_STRAT4096(inode)) { fe->icbTag.strategyType = cpu_to_le16(4096); fe->icbTag.strategyParameter = cpu_to_le16(1); fe->icbTag.numEntries = cpu_to_le16(2); - } - else - { + } else { fe->icbTag.strategyType = cpu_to_le16(4); fe->icbTag.numEntries = cpu_to_le16(1); } @@ -1649,13 +1671,13 @@ udf_update_inode(struct inode *inode, int do_sync) else if (S_ISSOCK(inode->i_mode)) fe->icbTag.fileType = ICBTAG_FILE_TYPE_SOCKET; - icbflags = UDF_I_ALLOCTYPE(inode) | - ((inode->i_mode & S_ISUID) ? ICBTAG_FLAG_SETUID : 0) | - ((inode->i_mode & S_ISGID) ? ICBTAG_FLAG_SETGID : 0) | - ((inode->i_mode & S_ISVTX) ? ICBTAG_FLAG_STICKY : 0) | - (le16_to_cpu(fe->icbTag.flags) & - ~(ICBTAG_FLAG_AD_MASK | ICBTAG_FLAG_SETUID | - ICBTAG_FLAG_SETGID | ICBTAG_FLAG_STICKY)); + icbflags = UDF_I_ALLOCTYPE(inode) | + ((inode->i_mode & S_ISUID) ? ICBTAG_FLAG_SETUID : 0) | + ((inode->i_mode & S_ISGID) ? ICBTAG_FLAG_SETGID : 0) | + ((inode->i_mode & S_ISVTX) ? ICBTAG_FLAG_STICKY : 0) | + (le16_to_cpu(fe->icbTag.flags) & + ~(ICBTAG_FLAG_AD_MASK | ICBTAG_FLAG_SETUID | + ICBTAG_FLAG_SETGID | ICBTAG_FLAG_STICKY)); fe->icbTag.flags = cpu_to_le16(icbflags); if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) @@ -1663,25 +1685,26 @@ udf_update_inode(struct inode *inode, int do_sync) else fe->descTag.descVersion = cpu_to_le16(2); fe->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb)); - fe->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); + fe->descTag.tagLocation = + cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); crclen += UDF_I_LENEATTR(inode) + UDF_I_LENALLOC(inode) - sizeof(tag); fe->descTag.descCRCLength = cpu_to_le16(crclen); - fe->descTag.descCRC = cpu_to_le16(udf_crc((char *)fe + sizeof(tag), crclen, 0)); + fe->descTag.descCRC = + cpu_to_le16(udf_crc((char *)fe + sizeof(tag), crclen, 0)); fe->descTag.tagChecksum = 0; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) - fe->descTag.tagChecksum += ((uint8_t *)&(fe->descTag))[i]; + fe->descTag.tagChecksum += + ((uint8_t *) & (fe->descTag))[i]; /* write the data blocks */ mark_buffer_dirty(bh); - if (do_sync) - { + if (do_sync) { sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { + if (buffer_req(bh) && !buffer_uptodate(bh)) { printk("IO error syncing udf inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); + inode->i_sb->s_id, inode->i_ino); err = -EIO; } } @@ -1689,8 +1712,7 @@ udf_update_inode(struct inode *inode, int do_sync) return err; } -struct inode * -udf_iget(struct super_block *sb, kernel_lb_addr ino) +struct inode *udf_iget(struct super_block *sb, kernel_lb_addr ino) { unsigned long block = udf_get_lb_pblock(sb, ino, 0); struct inode *inode = iget_locked(sb, block); @@ -1707,22 +1729,23 @@ udf_iget(struct super_block *sb, kernel_lb_addr ino) if (is_bad_inode(inode)) goto out_iput; - if (ino.logicalBlockNum >= UDF_SB_PARTLEN(sb, ino.partitionReferenceNum)) { + if (ino.logicalBlockNum >= + UDF_SB_PARTLEN(sb, ino.partitionReferenceNum)) { udf_debug("block=%d, partition=%d out of range\n", - ino.logicalBlockNum, ino.partitionReferenceNum); + ino.logicalBlockNum, ino.partitionReferenceNum); make_bad_inode(inode); goto out_iput; } return inode; - out_iput: + out_iput: iput(inode); return NULL; } -int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, - kernel_lb_addr eloc, uint32_t elen, int inc) +int8_t udf_add_aext(struct inode * inode, struct extent_position * epos, + kernel_lb_addr eloc, uint32_t elen, int inc) { int adsize; short_ad *sad = NULL; @@ -1732,7 +1755,9 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, uint8_t *ptr; if (!epos->bh) - ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + ptr = + UDF_I_DATA(inode) + epos->offset - + udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); else ptr = epos->bh->b_data + epos->offset; @@ -1743,21 +1768,24 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, else return -1; - if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) - { + if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) { char *sptr, *dptr; struct buffer_head *nbh; int err, loffset; kernel_lb_addr obloc = epos->block; - if (!(epos->block.logicalBlockNum = udf_new_block(inode->i_sb, NULL, - obloc.partitionReferenceNum, obloc.logicalBlockNum, &err))) - { + if (! + (epos->block.logicalBlockNum = + udf_new_block(inode->i_sb, NULL, + obloc.partitionReferenceNum, + obloc.logicalBlockNum, &err))) { return -1; } - if (!(nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, - epos->block, 0)))) - { + if (! + (nbh = + udf_tgetblk(inode->i_sb, + udf_get_lb_pblock(inode->i_sb, epos->block, + 0)))) { return -1; } lock_buffer(nbh); @@ -1768,144 +1796,142 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, aed = (struct allocExtDesc *)(nbh->b_data); if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) - aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum); - if (epos->offset + adsize > inode->i_sb->s_blocksize) - { + aed->previousAllocExtLocation = + cpu_to_le32(obloc.logicalBlockNum); + if (epos->offset + adsize > inode->i_sb->s_blocksize) { loffset = epos->offset; aed->lengthAllocDescs = cpu_to_le32(adsize); sptr = ptr - adsize; dptr = nbh->b_data + sizeof(struct allocExtDesc); memcpy(dptr, sptr, adsize); epos->offset = sizeof(struct allocExtDesc) + adsize; - } - else - { + } else { loffset = epos->offset + adsize; aed->lengthAllocDescs = cpu_to_le32(0); sptr = ptr; epos->offset = sizeof(struct allocExtDesc); - if (epos->bh) - { + if (epos->bh) { aed = (struct allocExtDesc *)epos->bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); - } - else - { + cpu_to_le32(le32_to_cpu + (aed->lengthAllocDescs) + + adsize); + } else { UDF_I_LENALLOC(inode) += adsize; mark_inode_dirty(inode); } } if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) udf_new_tag(nbh->b_data, TAG_IDENT_AED, 3, 1, - epos->block.logicalBlockNum, sizeof(tag)); + epos->block.logicalBlockNum, sizeof(tag)); else udf_new_tag(nbh->b_data, TAG_IDENT_AED, 2, 1, - epos->block.logicalBlockNum, sizeof(tag)); - switch (UDF_I_ALLOCTYPE(inode)) - { - case ICBTAG_FLAG_AD_SHORT: + epos->block.logicalBlockNum, sizeof(tag)); + switch (UDF_I_ALLOCTYPE(inode)) { + case ICBTAG_FLAG_AD_SHORT: { - sad = (short_ad *)sptr; - sad->extLength = cpu_to_le32( - EXT_NEXT_EXTENT_ALLOCDECS | - inode->i_sb->s_blocksize); - sad->extPosition = cpu_to_le32(epos->block.logicalBlockNum); + sad = (short_ad *) sptr; + sad->extLength = + cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | + inode->i_sb->s_blocksize); + sad->extPosition = + cpu_to_le32(epos->block.logicalBlockNum); break; } - case ICBTAG_FLAG_AD_LONG: + case ICBTAG_FLAG_AD_LONG: { - lad = (long_ad *)sptr; - lad->extLength = cpu_to_le32( - EXT_NEXT_EXTENT_ALLOCDECS | - inode->i_sb->s_blocksize); + lad = (long_ad *) sptr; + lad->extLength = + cpu_to_le32(EXT_NEXT_EXTENT_ALLOCDECS | + inode->i_sb->s_blocksize); lad->extLocation = cpu_to_lelb(epos->block); memset(lad->impUse, 0x00, sizeof(lad->impUse)); break; } } - if (epos->bh) - { - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + if (epos->bh) { + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) udf_update_tag(epos->bh->b_data, loffset); else - udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(epos->bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(epos->bh, inode); brelse(epos->bh); - } - else + } else mark_inode_dirty(inode); epos->bh = nbh; } etype = udf_write_aext(inode, epos, eloc, elen, inc); - if (!epos->bh) - { + if (!epos->bh) { UDF_I_LENALLOC(inode) += adsize; mark_inode_dirty(inode); - } - else - { + } else { aed = (struct allocExtDesc *)epos->bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(epos->bh->b_data, epos->offset + (inc ? 0 : adsize)); + cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + udf_update_tag(epos->bh->b_data, + epos->offset + (inc ? 0 : adsize)); else - udf_update_tag(epos->bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(epos->bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(epos->bh, inode); } return etype; } -int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, - kernel_lb_addr eloc, uint32_t elen, int inc) +int8_t udf_write_aext(struct inode * inode, struct extent_position * epos, + kernel_lb_addr eloc, uint32_t elen, int inc) { int adsize; uint8_t *ptr; if (!epos->bh) - ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + ptr = + UDF_I_DATA(inode) + epos->offset - + udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); else ptr = epos->bh->b_data + epos->offset; - switch (UDF_I_ALLOCTYPE(inode)) - { - case ICBTAG_FLAG_AD_SHORT: + switch (UDF_I_ALLOCTYPE(inode)) { + case ICBTAG_FLAG_AD_SHORT: { - short_ad *sad = (short_ad *)ptr; + short_ad *sad = (short_ad *) ptr; sad->extLength = cpu_to_le32(elen); sad->extPosition = cpu_to_le32(eloc.logicalBlockNum); adsize = sizeof(short_ad); break; } - case ICBTAG_FLAG_AD_LONG: + case ICBTAG_FLAG_AD_LONG: { - long_ad *lad = (long_ad *)ptr; + long_ad *lad = (long_ad *) ptr; lad->extLength = cpu_to_le32(elen); lad->extLocation = cpu_to_lelb(eloc); memset(lad->impUse, 0x00, sizeof(lad->impUse)); adsize = sizeof(long_ad); break; } - default: - return -1; + default: + return -1; } - if (epos->bh) - { - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - { - struct allocExtDesc *aed = (struct allocExtDesc *)epos->bh->b_data; + if (epos->bh) { + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) { + struct allocExtDesc *aed = + (struct allocExtDesc *)epos->bh->b_data; udf_update_tag(epos->bh->b_data, - le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct allocExtDesc)); + le32_to_cpu(aed->lengthAllocDescs) + + sizeof(struct allocExtDesc)); } mark_buffer_dirty_inode(epos->bh, inode); - } - else + } else mark_inode_dirty(inode); if (inc) @@ -1913,21 +1939,24 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, return (elen >> 30); } -int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, - kernel_lb_addr *eloc, uint32_t *elen, int inc) +int8_t udf_next_aext(struct inode * inode, struct extent_position * epos, + kernel_lb_addr * eloc, uint32_t * elen, int inc) { int8_t etype; while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == - (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) - { + (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { epos->block = *eloc; epos->offset = sizeof(struct allocExtDesc); brelse(epos->bh); - if (!(epos->bh = udf_tread(inode->i_sb, udf_get_lb_pblock(inode->i_sb, epos->block, 0)))) - { + if (! + (epos->bh = + udf_tread(inode->i_sb, + udf_get_lb_pblock(inode->i_sb, epos->block, + 0)))) { udf_debug("reading block %d failed!\n", - udf_get_lb_pblock(inode->i_sb, epos->block, 0)); + udf_get_lb_pblock(inode->i_sb, epos->block, + 0)); return -1; } } @@ -1935,58 +1964,71 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, return etype; } -int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, - kernel_lb_addr *eloc, uint32_t *elen, int inc) +int8_t udf_current_aext(struct inode * inode, struct extent_position * epos, + kernel_lb_addr * eloc, uint32_t * elen, int inc) { int alen; int8_t etype; uint8_t *ptr; - if (!epos->bh) - { + if (!epos->bh) { if (!epos->offset) epos->offset = udf_file_entry_alloc_offset(inode); - ptr = UDF_I_DATA(inode) + epos->offset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); - alen = udf_file_entry_alloc_offset(inode) + UDF_I_LENALLOC(inode); - } - else - { + ptr = + UDF_I_DATA(inode) + epos->offset - + udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode); + alen = + udf_file_entry_alloc_offset(inode) + UDF_I_LENALLOC(inode); + } else { if (!epos->offset) epos->offset = sizeof(struct allocExtDesc); ptr = epos->bh->b_data + epos->offset; - alen = sizeof(struct allocExtDesc) + le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)->lengthAllocDescs); + alen = + sizeof(struct allocExtDesc) + + le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)-> + lengthAllocDescs); } - switch (UDF_I_ALLOCTYPE(inode)) - { - case ICBTAG_FLAG_AD_SHORT: + switch (UDF_I_ALLOCTYPE(inode)) { + case ICBTAG_FLAG_AD_SHORT: { short_ad *sad; - if (!(sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc))) + if (! + (sad = + udf_get_fileshortad(ptr, alen, &epos->offset, + inc))) return -1; etype = le32_to_cpu(sad->extLength) >> 30; eloc->logicalBlockNum = le32_to_cpu(sad->extPosition); - eloc->partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; - *elen = le32_to_cpu(sad->extLength) & UDF_EXTENT_LENGTH_MASK; + eloc->partitionReferenceNum = + UDF_I_LOCATION(inode).partitionReferenceNum; + *elen = + le32_to_cpu(sad-> + extLength) & UDF_EXTENT_LENGTH_MASK; break; } - case ICBTAG_FLAG_AD_LONG: + case ICBTAG_FLAG_AD_LONG: { long_ad *lad; - if (!(lad = udf_get_filelongad(ptr, alen, &epos->offset, inc))) + if (! + (lad = + udf_get_filelongad(ptr, alen, &epos->offset, inc))) return -1; etype = le32_to_cpu(lad->extLength) >> 30; *eloc = lelb_to_cpu(lad->extLocation); - *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; + *elen = + le32_to_cpu(lad-> + extLength) & UDF_EXTENT_LENGTH_MASK; break; } - default: + default: { - udf_debug("alloc_type = %d unsupported\n", UDF_I_ALLOCTYPE(inode)); + udf_debug("alloc_type = %d unsupported\n", + UDF_I_ALLOCTYPE(inode)); return -1; } } @@ -2005,8 +2047,7 @@ udf_insert_aext(struct inode *inode, struct extent_position epos, if (epos.bh) get_bh(epos.bh); - while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) - { + while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) { udf_write_aext(inode, &epos, neloc, nelen, 1); neloc = oeloc; @@ -2017,16 +2058,15 @@ udf_insert_aext(struct inode *inode, struct extent_position epos, return (nelen >> 30); } -int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, - kernel_lb_addr eloc, uint32_t elen) +int8_t udf_delete_aext(struct inode * inode, struct extent_position epos, + kernel_lb_addr eloc, uint32_t elen) { struct extent_position oepos; int adsize; int8_t etype; struct allocExtDesc *aed; - if (epos.bh) - { + if (epos.bh) { get_bh(epos.bh); get_bh(epos.bh); } @@ -2042,11 +2082,9 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, if (udf_next_aext(inode, &epos, &eloc, &elen, 1) == -1) return -1; - while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) - { + while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { udf_write_aext(inode, &oepos, eloc, (etype << 30) | elen, 1); - if (oepos.bh != epos.bh) - { + if (oepos.bh != epos.bh) { oepos.block = epos.block; brelse(oepos.bh); get_bh(epos.bh); @@ -2057,45 +2095,44 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, memset(&eloc, 0x00, sizeof(kernel_lb_addr)); elen = 0; - if (epos.bh != oepos.bh) - { + if (epos.bh != oepos.bh) { udf_free_blocks(inode->i_sb, inode, epos.block, 0, 1); udf_write_aext(inode, &oepos, eloc, elen, 1); udf_write_aext(inode, &oepos, eloc, elen, 1); - if (!oepos.bh) - { + if (!oepos.bh) { UDF_I_LENALLOC(inode) -= (adsize * 2); mark_inode_dirty(inode); - } - else - { + } else { aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2*adsize)); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(oepos.bh->b_data, oepos.offset - (2*adsize)); + cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - + (2 * adsize)); + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + udf_update_tag(oepos.bh->b_data, + oepos.offset - (2 * adsize)); else - udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(oepos.bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(oepos.bh, inode); } - } - else - { + } else { udf_write_aext(inode, &oepos, eloc, elen, 1); - if (!oepos.bh) - { + if (!oepos.bh) { UDF_I_LENALLOC(inode) -= adsize; mark_inode_dirty(inode); - } - else - { + } else { aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = - cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) - udf_update_tag(oepos.bh->b_data, epos.offset - adsize); + cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - + adsize); + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + udf_update_tag(oepos.bh->b_data, + epos.offset - adsize); else - udf_update_tag(oepos.bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(oepos.bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(oepos.bh, inode); } } @@ -2105,14 +2142,15 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, return (elen >> 30); } -int8_t inode_bmap(struct inode *inode, sector_t block, struct extent_position *pos, - kernel_lb_addr *eloc, uint32_t *elen, sector_t *offset) +int8_t inode_bmap(struct inode * inode, sector_t block, + struct extent_position * pos, kernel_lb_addr * eloc, + uint32_t * elen, sector_t * offset) { - loff_t lbcount = 0, bcount = (loff_t)block << inode->i_sb->s_blocksize_bits; + loff_t lbcount = 0, bcount = + (loff_t) block << inode->i_sb->s_blocksize_bits; int8_t etype; - if (block < 0) - { + if (block < 0) { printk(KERN_ERR "udf: inode_bmap: block < 0\n"); return -1; } @@ -2122,11 +2160,10 @@ int8_t inode_bmap(struct inode *inode, sector_t block, struct extent_position *p pos->bh = NULL; *elen = 0; - do - { - if ((etype = udf_next_aext(inode, pos, eloc, elen, 1)) == -1) - { - *offset = (bcount - lbcount) >> inode->i_sb->s_blocksize_bits; + do { + if ((etype = udf_next_aext(inode, pos, eloc, elen, 1)) == -1) { + *offset = + (bcount - lbcount) >> inode->i_sb->s_blocksize_bits; UDF_I_LENEXTENTS(inode) = lbcount; return -1; } @@ -2143,12 +2180,13 @@ long udf_block_map(struct inode *inode, sector_t block) kernel_lb_addr eloc; uint32_t elen; sector_t offset; - struct extent_position epos = { NULL, 0, { 0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; int ret; lock_kernel(); - if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) + if (inode_bmap(inode, block, &epos, &eloc, &elen, &offset) == + (EXT_RECORDED_ALLOCATED >> 30)) ret = udf_get_lb_pblock(inode->i_sb, eloc, offset); else ret = 0; diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 084216107667..4826c3616eef 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -26,43 +26,38 @@ #include <linux/udf_fs.h> #include "udf_sb.h" -unsigned int -udf_get_last_session(struct super_block *sb) +unsigned int udf_get_last_session(struct super_block *sb) { struct cdrom_multisession ms_info; unsigned int vol_desc_start; struct block_device *bdev = sb->s_bdev; int i; - vol_desc_start=0; - ms_info.addr_format=CDROM_LBA; - i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); + vol_desc_start = 0; + ms_info.addr_format = CDROM_LBA; + i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info); #define WE_OBEY_THE_WRITTEN_STANDARDS 1 - if (i == 0) - { + if (i == 0) { udf_debug("XA disk: %s, vol_desc_start=%d\n", - (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); + (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); #if WE_OBEY_THE_WRITTEN_STANDARDS - if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ + if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ #endif vol_desc_start = ms_info.addr.lba; - } - else - { + } else { udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i); } return vol_desc_start; } -unsigned long -udf_get_last_block(struct super_block *sb) +unsigned long udf_get_last_block(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; unsigned long lblock = 0; - if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock)) + if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long)&lblock)) lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (lblock) diff --git a/fs/udf/misc.c b/fs/udf/misc.c index a2b2a98ce78a..a7f57277a96e 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -29,8 +29,7 @@ #include "udf_i.h" #include "udf_sb.h" -struct buffer_head * -udf_tgetblk(struct super_block *sb, int block) +struct buffer_head *udf_tgetblk(struct super_block *sb, int block) { if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV)) return sb_getblk(sb, udf_fixed_to_variable(block)); @@ -38,8 +37,7 @@ udf_tgetblk(struct super_block *sb, int block) return sb_getblk(sb, block); } -struct buffer_head * -udf_tread(struct super_block *sb, int block) +struct buffer_head *udf_tread(struct super_block *sb, int block) { if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV)) return sb_bread(sb, udf_fixed_to_variable(block)); @@ -47,9 +45,8 @@ udf_tread(struct super_block *sb, int block) return sb_bread(sb, block); } -struct genericFormat * -udf_add_extendedattr(struct inode * inode, uint32_t size, uint32_t type, - uint8_t loc) +struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, + uint32_t type, uint8_t loc) { uint8_t *ea = NULL, *ad = NULL; int offset; @@ -59,78 +56,76 @@ udf_add_extendedattr(struct inode * inode, uint32_t size, uint32_t type, ea = UDF_I_DATA(inode); if (UDF_I_LENEATTR(inode)) ad = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); - else - { + else { ad = ea; size += sizeof(struct extendedAttrHeaderDesc); } offset = inode->i_sb->s_blocksize - udf_file_entry_alloc_offset(inode) - - UDF_I_LENALLOC(inode); + UDF_I_LENALLOC(inode); /* TODO - Check for FreeEASpace */ - if (loc & 0x01 && offset >= size) - { + if (loc & 0x01 && offset >= size) { struct extendedAttrHeaderDesc *eahd; eahd = (struct extendedAttrHeaderDesc *)ea; - if (UDF_I_LENALLOC(inode)) - { + if (UDF_I_LENALLOC(inode)) { memmove(&ad[size], ad, UDF_I_LENALLOC(inode)); } - if (UDF_I_LENEATTR(inode)) - { + if (UDF_I_LENEATTR(inode)) { /* check checksum/crc */ - if (le16_to_cpu(eahd->descTag.tagIdent) != TAG_IDENT_EAHD || - le32_to_cpu(eahd->descTag.tagLocation) != UDF_I_LOCATION(inode).logicalBlockNum) - { + if (le16_to_cpu(eahd->descTag.tagIdent) != + TAG_IDENT_EAHD + || le32_to_cpu(eahd->descTag.tagLocation) != + UDF_I_LOCATION(inode).logicalBlockNum) { return NULL; } - } - else - { + } else { size -= sizeof(struct extendedAttrHeaderDesc); - UDF_I_LENEATTR(inode) += sizeof(struct extendedAttrHeaderDesc); + UDF_I_LENEATTR(inode) += + sizeof(struct extendedAttrHeaderDesc); eahd->descTag.tagIdent = cpu_to_le16(TAG_IDENT_EAHD); if (UDF_SB_UDFREV(inode->i_sb) >= 0x0200) eahd->descTag.descVersion = cpu_to_le16(3); else eahd->descTag.descVersion = cpu_to_le16(2); - eahd->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb)); - eahd->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); + eahd->descTag.tagSerialNum = + cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb)); + eahd->descTag.tagLocation = + cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum); eahd->impAttrLocation = cpu_to_le32(0xFFFFFFFF); eahd->appAttrLocation = cpu_to_le32(0xFFFFFFFF); } offset = UDF_I_LENEATTR(inode); - if (type < 2048) - { - if (le32_to_cpu(eahd->appAttrLocation) < UDF_I_LENEATTR(inode)) - { - uint32_t aal = le32_to_cpu(eahd->appAttrLocation); - memmove(&ea[offset - aal + size], - &ea[aal], offset - aal); + if (type < 2048) { + if (le32_to_cpu(eahd->appAttrLocation) < + UDF_I_LENEATTR(inode)) { + uint32_t aal = + le32_to_cpu(eahd->appAttrLocation); + memmove(&ea[offset - aal + size], &ea[aal], + offset - aal); offset -= aal; eahd->appAttrLocation = cpu_to_le32(aal + size); } - if (le32_to_cpu(eahd->impAttrLocation) < UDF_I_LENEATTR(inode)) - { - uint32_t ial = le32_to_cpu(eahd->impAttrLocation); - memmove(&ea[offset - ial + size], - &ea[ial], offset - ial); + if (le32_to_cpu(eahd->impAttrLocation) < + UDF_I_LENEATTR(inode)) { + uint32_t ial = + le32_to_cpu(eahd->impAttrLocation); + memmove(&ea[offset - ial + size], &ea[ial], + offset - ial); offset -= ial; eahd->impAttrLocation = cpu_to_le32(ial + size); } - } - else if (type < 65536) - { - if (le32_to_cpu(eahd->appAttrLocation) < UDF_I_LENEATTR(inode)) - { - uint32_t aal = le32_to_cpu(eahd->appAttrLocation); - memmove(&ea[offset - aal + size], - &ea[aal], offset - aal); + } else if (type < 65536) { + if (le32_to_cpu(eahd->appAttrLocation) < + UDF_I_LENEATTR(inode)) { + uint32_t aal = + le32_to_cpu(eahd->appAttrLocation); + memmove(&ea[offset - aal + size], &ea[aal], + offset - aal); offset -= aal; eahd->appAttrLocation = cpu_to_le32(aal + size); } @@ -138,22 +133,23 @@ udf_add_extendedattr(struct inode * inode, uint32_t size, uint32_t type, /* rewrite CRC + checksum of eahd */ crclen = sizeof(struct extendedAttrHeaderDesc) - sizeof(tag); eahd->descTag.descCRCLength = cpu_to_le16(crclen); - eahd->descTag.descCRC = cpu_to_le16(udf_crc((char *)eahd + sizeof(tag), crclen, 0)); + eahd->descTag.descCRC = + cpu_to_le16(udf_crc((char *)eahd + sizeof(tag), crclen, 0)); eahd->descTag.tagChecksum = 0; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) - eahd->descTag.tagChecksum += ((uint8_t *)&(eahd->descTag))[i]; + eahd->descTag.tagChecksum += + ((uint8_t *) & (eahd->descTag))[i]; UDF_I_LENEATTR(inode) += size; return (struct genericFormat *)&ea[offset]; } - if (loc & 0x02) - { + if (loc & 0x02) { } return NULL; } -struct genericFormat * -udf_get_extendedattr(struct inode *inode, uint32_t type, uint8_t subtype) +struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, + uint8_t subtype) { struct genericFormat *gaf; uint8_t *ea = NULL; @@ -161,18 +157,17 @@ udf_get_extendedattr(struct inode *inode, uint32_t type, uint8_t subtype) ea = UDF_I_DATA(inode); - if (UDF_I_LENEATTR(inode)) - { + if (UDF_I_LENEATTR(inode)) { struct extendedAttrHeaderDesc *eahd; eahd = (struct extendedAttrHeaderDesc *)ea; /* check checksum/crc */ if (le16_to_cpu(eahd->descTag.tagIdent) != TAG_IDENT_EAHD || - le32_to_cpu(eahd->descTag.tagLocation) != UDF_I_LOCATION(inode).logicalBlockNum) - { + le32_to_cpu(eahd->descTag.tagLocation) != + UDF_I_LOCATION(inode).logicalBlockNum) { return NULL; } - + if (type < 2048) offset = sizeof(struct extendedAttrHeaderDesc); else if (type < 65536) @@ -180,10 +175,10 @@ udf_get_extendedattr(struct inode *inode, uint32_t type, uint8_t subtype) else offset = le32_to_cpu(eahd->appAttrLocation); - while (offset < UDF_I_LENEATTR(inode)) - { + while (offset < UDF_I_LENEATTR(inode)) { gaf = (struct genericFormat *)&ea[offset]; - if (le32_to_cpu(gaf->attrType) == type && gaf->attrSubtype == subtype) + if (le32_to_cpu(gaf->attrType) == type + && gaf->attrSubtype == subtype) return gaf; else offset += le32_to_cpu(gaf->attrLength); @@ -202,8 +197,8 @@ udf_get_extendedattr(struct inode *inode, uint32_t type, uint8_t subtype) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -struct buffer_head * -udf_read_tagged(struct super_block *sb, uint32_t block, uint32_t location, uint16_t *ident) +struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, + uint32_t location, uint16_t * ident) { tag *tag_p; struct buffer_head *bh = NULL; @@ -215,29 +210,29 @@ udf_read_tagged(struct super_block *sb, uint32_t block, uint32_t location, uint1 return NULL; bh = udf_tread(sb, block + UDF_SB_SESSION(sb)); - if (!bh) - { - udf_debug("block=%d, location=%d: read failed\n", block + UDF_SB_SESSION(sb), location); + if (!bh) { + udf_debug("block=%d, location=%d: read failed\n", + block + UDF_SB_SESSION(sb), location); return NULL; } - tag_p = (tag *)(bh->b_data); + tag_p = (tag *) (bh->b_data); *ident = le16_to_cpu(tag_p->tagIdent); - if ( location != le32_to_cpu(tag_p->tagLocation) ) - { + if (location != le32_to_cpu(tag_p->tagLocation)) { udf_debug("location mismatch block %u, tag %u != %u\n", - block + UDF_SB_SESSION(sb), le32_to_cpu(tag_p->tagLocation), location); + block + UDF_SB_SESSION(sb), + le32_to_cpu(tag_p->tagLocation), location); goto error_out; } - + /* Verify the tag checksum */ checksum = 0U; for (i = 0; i < 4; i++) - checksum += (uint8_t)(bh->b_data[i]); + checksum += (uint8_t) (bh->b_data[i]); for (i = 5; i < 16; i++) - checksum += (uint8_t)(bh->b_data[i]); + checksum += (uint8_t) (bh->b_data[i]); if (checksum != tag_p->tagChecksum) { printk(KERN_ERR "udf: tag checksum failed block %d\n", block); goto error_out; @@ -245,38 +240,39 @@ udf_read_tagged(struct super_block *sb, uint32_t block, uint32_t location, uint1 /* Verify the tag version */ if (le16_to_cpu(tag_p->descVersion) != 0x0002U && - le16_to_cpu(tag_p->descVersion) != 0x0003U) - { + le16_to_cpu(tag_p->descVersion) != 0x0003U) { udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n", - le16_to_cpu(tag_p->descVersion), block); + le16_to_cpu(tag_p->descVersion), block); goto error_out; } /* Verify the descriptor CRC */ if (le16_to_cpu(tag_p->descCRCLength) + sizeof(tag) > sb->s_blocksize || - le16_to_cpu(tag_p->descCRC) == udf_crc(bh->b_data + sizeof(tag), - le16_to_cpu(tag_p->descCRCLength), 0)) - { + le16_to_cpu(tag_p->descCRC) == udf_crc(bh->b_data + sizeof(tag), + le16_to_cpu(tag_p-> + descCRCLength), + 0)) { return bh; } udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", - block + UDF_SB_SESSION(sb), le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); + block + UDF_SB_SESSION(sb), le16_to_cpu(tag_p->descCRC), + le16_to_cpu(tag_p->descCRCLength)); -error_out: + error_out: brelse(bh); return NULL; } -struct buffer_head * -udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, uint32_t offset, uint16_t *ident) +struct buffer_head *udf_read_ptagged(struct super_block *sb, kernel_lb_addr loc, + uint32_t offset, uint16_t * ident) { return udf_read_tagged(sb, udf_get_lb_pblock(sb, loc, offset), - loc.logicalBlockNum + offset, ident); + loc.logicalBlockNum + offset, ident); } void udf_update_tag(char *data, int length) { - tag *tptr = (tag *)data; + tag *tptr = (tag *) data; int i; length -= sizeof(tag); @@ -285,15 +281,15 @@ void udf_update_tag(char *data, int length) tptr->descCRCLength = cpu_to_le16(length); tptr->descCRC = cpu_to_le16(udf_crc(data + sizeof(tag), length, 0)); - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) - tptr->tagChecksum += (uint8_t)(data[i]); + tptr->tagChecksum += (uint8_t) (data[i]); } void udf_new_tag(char *data, uint16_t ident, uint16_t version, uint16_t snum, - uint32_t loc, int length) + uint32_t loc, int length) { - tag *tptr = (tag *)data; + tag *tptr = (tag *) data; tptr->tagIdent = cpu_to_le16(ident); tptr->descVersion = cpu_to_le16(version); tptr->tagSerialNum = cpu_to_le16(snum); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 51fe307dc0ec..334d363a0903 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -32,7 +32,8 @@ #include <linux/buffer_head.h> #include <linux/sched.h> -static inline int udf_match(int len1, const char *name1, int len2, const char *name2) +static inline int udf_match(int len1, const char *name1, int len2, + const char *name2) { if (len1 != len2) return 0; @@ -40,8 +41,8 @@ static inline int udf_match(int len1, const char *name1, int len2, const char *n } int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, - struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh, - uint8_t *impuse, uint8_t *fileident) + struct fileIdentDesc *sfi, struct udf_fileident_bh *fibh, + uint8_t * impuse, uint8_t * fileident) { uint16_t crclen = fibh->eoffset - fibh->soffset - sizeof(tag); uint16_t crc; @@ -51,7 +52,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, uint16_t liu = le16_to_cpu(cfi->lengthOfImpUse); uint8_t lfi = cfi->lengthFileIdent; int padlen = fibh->eoffset - fibh->soffset - liu - lfi - - sizeof(struct fileIdentDesc); + sizeof(struct fileIdentDesc); int adinicb = 0; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) @@ -59,83 +60,86 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, offset = fibh->soffset + sizeof(struct fileIdentDesc); - if (impuse) - { + if (impuse) { if (adinicb || (offset + liu < 0)) - memcpy((uint8_t *)sfi->impUse, impuse, liu); + memcpy((uint8_t *) sfi->impUse, impuse, liu); else if (offset >= 0) memcpy(fibh->ebh->b_data + offset, impuse, liu); - else - { - memcpy((uint8_t *)sfi->impUse, impuse, -offset); - memcpy(fibh->ebh->b_data, impuse - offset, liu + offset); + else { + memcpy((uint8_t *) sfi->impUse, impuse, -offset); + memcpy(fibh->ebh->b_data, impuse - offset, + liu + offset); } } offset += liu; - if (fileident) - { + if (fileident) { if (adinicb || (offset + lfi < 0)) - memcpy((uint8_t *)sfi->fileIdent + liu, fileident, lfi); + memcpy((uint8_t *) sfi->fileIdent + liu, fileident, + lfi); else if (offset >= 0) memcpy(fibh->ebh->b_data + offset, fileident, lfi); - else - { - memcpy((uint8_t *)sfi->fileIdent + liu, fileident, -offset); - memcpy(fibh->ebh->b_data, fileident - offset, lfi + offset); + else { + memcpy((uint8_t *) sfi->fileIdent + liu, fileident, + -offset); + memcpy(fibh->ebh->b_data, fileident - offset, + lfi + offset); } } offset += lfi; if (adinicb || (offset + padlen < 0)) - memset((uint8_t *)sfi->padding + liu + lfi, 0x00, padlen); + memset((uint8_t *) sfi->padding + liu + lfi, 0x00, padlen); else if (offset >= 0) memset(fibh->ebh->b_data + offset, 0x00, padlen); - else - { - memset((uint8_t *)sfi->padding + liu + lfi, 0x00, -offset); + else { + memset((uint8_t *) sfi->padding + liu + lfi, 0x00, -offset); memset(fibh->ebh->b_data, 0x00, padlen + offset); } - crc = udf_crc((uint8_t *)cfi + sizeof(tag), sizeof(struct fileIdentDesc) - - sizeof(tag), 0); + crc = + udf_crc((uint8_t *) cfi + sizeof(tag), + sizeof(struct fileIdentDesc) - sizeof(tag), 0); if (fibh->sbh == fibh->ebh) - crc = udf_crc((uint8_t *)sfi->impUse, - crclen + sizeof(tag) - sizeof(struct fileIdentDesc), crc); + crc = udf_crc((uint8_t *) sfi->impUse, + crclen + sizeof(tag) - + sizeof(struct fileIdentDesc), crc); else if (sizeof(struct fileIdentDesc) >= -fibh->soffset) - crc = udf_crc(fibh->ebh->b_data + sizeof(struct fileIdentDesc) + fibh->soffset, - crclen + sizeof(tag) - sizeof(struct fileIdentDesc), crc); - else - { - crc = udf_crc((uint8_t *)sfi->impUse, - -fibh->soffset - sizeof(struct fileIdentDesc), crc); + crc = + udf_crc(fibh->ebh->b_data + sizeof(struct fileIdentDesc) + + fibh->soffset, + crclen + sizeof(tag) - sizeof(struct fileIdentDesc), + crc); + else { + crc = udf_crc((uint8_t *) sfi->impUse, + -fibh->soffset - sizeof(struct fileIdentDesc), + crc); crc = udf_crc(fibh->ebh->b_data, fibh->eoffset, crc); } cfi->descTag.descCRC = cpu_to_le16(crc); cfi->descTag.descCRCLength = cpu_to_le16(crclen); - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) - checksum += ((uint8_t *)&cfi->descTag)[i]; + checksum += ((uint8_t *) & cfi->descTag)[i]; cfi->descTag.tagChecksum = checksum; if (adinicb || (sizeof(struct fileIdentDesc) <= -fibh->soffset)) - memcpy((uint8_t *)sfi, (uint8_t *)cfi, sizeof(struct fileIdentDesc)); - else - { - memcpy((uint8_t *)sfi, (uint8_t *)cfi, -fibh->soffset); - memcpy(fibh->ebh->b_data, (uint8_t *)cfi - fibh->soffset, - sizeof(struct fileIdentDesc) + fibh->soffset); + memcpy((uint8_t *) sfi, (uint8_t *) cfi, + sizeof(struct fileIdentDesc)); + else { + memcpy((uint8_t *) sfi, (uint8_t *) cfi, -fibh->soffset); + memcpy(fibh->ebh->b_data, (uint8_t *) cfi - fibh->soffset, + sizeof(struct fileIdentDesc) + fibh->soffset); } if (adinicb) mark_inode_dirty(inode); - else - { + else { if (fibh->sbh != fibh->ebh) mark_buffer_dirty_inode(fibh->ebh, inode); mark_buffer_dirty_inode(fibh->sbh, inode); @@ -143,12 +147,12 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, return 0; } -static struct fileIdentDesc * -udf_find_entry(struct inode *dir, struct dentry *dentry, - struct udf_fileident_bh *fibh, - struct fileIdentDesc *cfi) +static struct fileIdentDesc *udf_find_entry(struct inode *dir, + struct dentry *dentry, + struct udf_fileident_bh *fibh, + struct fileIdentDesc *cfi) { - struct fileIdentDesc *fi=NULL; + struct fileIdentDesc *fi = NULL; loff_t f_pos; int block, flen; char fname[UDF_NAME_LEN]; @@ -159,46 +163,41 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, kernel_lb_addr eloc; uint32_t elen; sector_t offset; - struct extent_position epos = { NULL, 0, { 0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; size = (udf_ext0_offset(dir) + dir->i_size) >> 2; f_pos = (udf_ext0_offset(dir) >> 2); - fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; + fibh->soffset = fibh->eoffset = + (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh->sbh = fibh->ebh = NULL; else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) - { + &epos, &eloc, &elen, + &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); - if ((++offset << dir->i_sb->s_blocksize_bits) < elen) - { + if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } - else + } else offset = 0; - if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) - { + if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) { brelse(epos.bh); return NULL; } - } - else - { + } else { brelse(epos.bh); return NULL; } - while ( (f_pos < size) ) - { - fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); + while ((f_pos < size)) { + fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, + &elen, &offset); - if (!fi) - { + if (!fi) { if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); @@ -209,45 +208,45 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, liu = le16_to_cpu(cfi->lengthOfImpUse); lfi = cfi->lengthFileIdent; - if (fibh->sbh == fibh->ebh) - { + if (fibh->sbh == fibh->ebh) { nameptr = fi->fileIdent + liu; - } - else - { + } else { int poffset; /* Unpaded ending offset */ - poffset = fibh->soffset + sizeof(struct fileIdentDesc) + liu + lfi; + poffset = + fibh->soffset + sizeof(struct fileIdentDesc) + liu + + lfi; if (poffset >= lfi) - nameptr = (uint8_t *)(fibh->ebh->b_data + poffset - lfi); - else - { + nameptr = + (uint8_t *) (fibh->ebh->b_data + poffset - + lfi); + else { nameptr = fname; - memcpy(nameptr, fi->fileIdent + liu, lfi - poffset); - memcpy(nameptr + lfi - poffset, fibh->ebh->b_data, poffset); + memcpy(nameptr, fi->fileIdent + liu, + lfi - poffset); + memcpy(nameptr + lfi - poffset, + fibh->ebh->b_data, poffset); } } - if ( (cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0 ) - { - if ( !UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNDELETE) ) + if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { + if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNDELETE)) continue; } - - if ( (cfi->fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0 ) - { - if ( !UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNHIDE) ) + + if ((cfi->fileCharacteristics & FID_FILE_CHAR_HIDDEN) != 0) { + if (!UDF_QUERY_FLAG(dir->i_sb, UDF_FLAG_UNHIDE)) continue; } if (!lfi) continue; - if ((flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi))) - { - if (udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) - { + if ((flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi))) { + if (udf_match + (flen, fname, dentry->d_name.len, + dentry->d_name.name)) { brelse(epos.bh); return fi; } @@ -293,41 +292,37 @@ udf_find_entry(struct inode *dir, struct dentry *dentry, * Written, tested, and released. */ -static struct dentry * -udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { struct inode *inode = NULL; struct fileIdentDesc cfi; struct udf_fileident_bh fibh; - if (dentry->d_name.len > UDF_NAME_LEN-2) + if (dentry->d_name.len > UDF_NAME_LEN - 2) return ERR_PTR(-ENAMETOOLONG); lock_kernel(); #ifdef UDF_RECOVERY /* temporary shorthand for specifying files by inode number */ - if (!strncmp(dentry->d_name.name, ".B=", 3) ) - { - kernel_lb_addr lb = { 0, simple_strtoul(dentry->d_name.name+3, NULL, 0) }; + if (!strncmp(dentry->d_name.name, ".B=", 3)) { + kernel_lb_addr lb = + { 0, simple_strtoul(dentry->d_name.name + 3, NULL, 0) }; inode = udf_iget(dir->i_sb, lb); - if (!inode) - { + if (!inode) { unlock_kernel(); return ERR_PTR(-EACCES); } - } - else -#endif /* UDF_RECOVERY */ + } else +#endif /* UDF_RECOVERY */ - if (udf_find_entry(dir, dentry, &fibh, &cfi)) - { + if (udf_find_entry(dir, dentry, &fibh, &cfi)) { if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); inode = udf_iget(dir->i_sb, lelb_to_cpu(cfi.icb.extLocation)); - if ( !inode ) - { + if (!inode) { unlock_kernel(); return ERR_PTR(-EACCES); } @@ -337,13 +332,13 @@ udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return NULL; } -static struct fileIdentDesc * -udf_add_entry(struct inode *dir, struct dentry *dentry, - struct udf_fileident_bh *fibh, - struct fileIdentDesc *cfi, int *err) +static struct fileIdentDesc *udf_add_entry(struct inode *dir, + struct dentry *dentry, + struct udf_fileident_bh *fibh, + struct fileIdentDesc *cfi, int *err) { struct super_block *sb; - struct fileIdentDesc *fi=NULL; + struct fileIdentDesc *fi = NULL; char name[UDF_NAME_LEN], fname[UDF_NAME_LEN]; int namelen; loff_t f_pos; @@ -357,50 +352,47 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, kernel_lb_addr eloc; uint32_t elen; sector_t offset; - struct extent_position epos = { NULL, 0, { 0, 0 }}; + struct extent_position epos = { NULL, 0, {0, 0} }; sb = dir->i_sb; - if (dentry) - { - if (!dentry->d_name.len) - { + if (dentry) { + if (!dentry->d_name.len) { *err = -EINVAL; return NULL; } - if ( !(namelen = udf_put_filename(sb, dentry->d_name.name, name, dentry->d_name.len))) - { + if (! + (namelen = + udf_put_filename(sb, dentry->d_name.name, name, + dentry->d_name.len))) { *err = -ENAMETOOLONG; return NULL; } - } - else + } else namelen = 0; nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3; f_pos = (udf_ext0_offset(dir) >> 2); - fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; + fibh->soffset = fibh->eoffset = + (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh->sbh = fibh->ebh = NULL; else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) - { + &epos, &eloc, &elen, + &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); - if ((++offset << dir->i_sb->s_blocksize_bits) < elen) - { + if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } - else + } else offset = 0; - if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) - { + if (!(fibh->sbh = fibh->ebh = udf_tread(dir->i_sb, block))) { brelse(epos.bh); *err = -EIO; return NULL; @@ -408,21 +400,18 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, block = UDF_I_LOCATION(dir).logicalBlockNum; - } - else - { + } else { block = udf_get_lb_pblock(dir->i_sb, UDF_I_LOCATION(dir), 0); fibh->sbh = fibh->ebh = NULL; fibh->soffset = fibh->eoffset = sb->s_blocksize; goto add; } - while ( (f_pos < size) ) - { - fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, &elen, &offset); + while ((f_pos < size)) { + fi = udf_fileident_read(dir, &f_pos, fibh, cfi, &epos, &eloc, + &elen, &offset); - if (!fi) - { + if (!fi) { if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); @@ -436,36 +425,38 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, if (fibh->sbh == fibh->ebh) nameptr = fi->fileIdent + liu; - else - { + else { int poffset; /* Unpaded ending offset */ - poffset = fibh->soffset + sizeof(struct fileIdentDesc) + liu + lfi; + poffset = + fibh->soffset + sizeof(struct fileIdentDesc) + liu + + lfi; if (poffset >= lfi) - nameptr = (char *)(fibh->ebh->b_data + poffset - lfi); - else - { + nameptr = + (char *)(fibh->ebh->b_data + poffset - lfi); + else { nameptr = fname; - memcpy(nameptr, fi->fileIdent + liu, lfi - poffset); - memcpy(nameptr + lfi - poffset, fibh->ebh->b_data, poffset); + memcpy(nameptr, fi->fileIdent + liu, + lfi - poffset); + memcpy(nameptr + lfi - poffset, + fibh->ebh->b_data, poffset); } } - if ( (cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0 ) - { - if (((sizeof(struct fileIdentDesc) + liu + lfi + 3) & ~3) == nfidlen) - { + if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { + if (((sizeof(struct fileIdentDesc) + liu + lfi + + 3) & ~3) == nfidlen) { brelse(epos.bh); cfi->descTag.tagSerialNum = cpu_to_le16(1); cfi->fileVersionNum = cpu_to_le16(1); cfi->fileCharacteristics = 0; cfi->lengthFileIdent = namelen; cfi->lengthOfImpUse = cpu_to_le16(0); - if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) + if (!udf_write_fi + (dir, cfi, fi, fibh, NULL, name)) return fi; - else - { + else { *err = -EIO; return NULL; } @@ -476,8 +467,8 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, continue; if ((flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi)) && - udf_match(flen, fname, dentry->d_name.len, dentry->d_name.name)) - { + udf_match(flen, fname, dentry->d_name.len, + dentry->d_name.name)) { if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); @@ -487,12 +478,11 @@ udf_add_entry(struct inode *dir, struct dentry *dentry, } } -add: + add: f_pos += nfidlen; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB && - sb->s_blocksize - fibh->eoffset < nfidlen) - { + sb->s_blocksize - fibh->eoffset < nfidlen) { brelse(epos.bh); epos.bh = NULL; fibh->soffset -= udf_ext0_offset(dir); @@ -501,11 +491,14 @@ add: if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); brelse(fibh->sbh); - if (!(fibh->sbh = fibh->ebh = udf_expand_dir_adinicb(dir, &block, err))) + if (! + (fibh->sbh = fibh->ebh = + udf_expand_dir_adinicb(dir, &block, err))) return NULL; epos.block = UDF_I_LOCATION(dir); eloc.logicalBlockNum = block; - eloc.partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum; + eloc.partitionReferenceNum = + UDF_I_LOCATION(dir).partitionReferenceNum; elen = dir->i_sb->s_blocksize; epos.offset = udf_file_entry_alloc_offset(dir); if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) @@ -514,89 +507,84 @@ add: epos.offset += sizeof(long_ad); } - if (sb->s_blocksize - fibh->eoffset >= nfidlen) - { + if (sb->s_blocksize - fibh->eoffset >= nfidlen) { fibh->soffset = fibh->eoffset; fibh->eoffset += nfidlen; - if (fibh->sbh != fibh->ebh) - { + if (fibh->sbh != fibh->ebh) { brelse(fibh->sbh); fibh->sbh = fibh->ebh; } - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { block = UDF_I_LOCATION(dir).logicalBlockNum; - fi = (struct fileIdentDesc *)(UDF_I_DATA(dir) + fibh->soffset - udf_ext0_offset(dir) + UDF_I_LENEATTR(dir)); - } - else - { + fi = (struct fileIdentDesc *)(UDF_I_DATA(dir) + + fibh->soffset - + udf_ext0_offset(dir) + + UDF_I_LENEATTR(dir)); + } else { block = eloc.logicalBlockNum + ((elen - 1) >> - dir->i_sb->s_blocksize_bits); - fi = (struct fileIdentDesc *)(fibh->sbh->b_data + fibh->soffset); + dir->i_sb-> + s_blocksize_bits); + fi = (struct fileIdentDesc *)(fibh->sbh->b_data + + fibh->soffset); } - } - else - { + } else { fibh->soffset = fibh->eoffset - sb->s_blocksize; fibh->eoffset += nfidlen - sb->s_blocksize; - if (fibh->sbh != fibh->ebh) - { + if (fibh->sbh != fibh->ebh) { brelse(fibh->sbh); fibh->sbh = fibh->ebh; } block = eloc.logicalBlockNum + ((elen - 1) >> - dir->i_sb->s_blocksize_bits); + dir->i_sb->s_blocksize_bits); - if (!(fibh->ebh = udf_bread(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), 1, err))) - { + if (! + (fibh->ebh = + udf_bread(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), + 1, err))) { brelse(epos.bh); brelse(fibh->sbh); return NULL; } - if (!(fibh->soffset)) - { + if (!(fibh->soffset)) { if (udf_next_aext(dir, &epos, &eloc, &elen, 1) == - (EXT_RECORDED_ALLOCATED >> 30)) - { + (EXT_RECORDED_ALLOCATED >> 30)) { block = eloc.logicalBlockNum + ((elen - 1) >> - dir->i_sb->s_blocksize_bits); - } - else - block ++; + dir->i_sb-> + s_blocksize_bits); + } else + block++; brelse(fibh->sbh); fibh->sbh = fibh->ebh; fi = (struct fileIdentDesc *)(fibh->sbh->b_data); - } - else - { + } else { fi = (struct fileIdentDesc *) - (fibh->sbh->b_data + sb->s_blocksize + fibh->soffset); + (fibh->sbh->b_data + sb->s_blocksize + + fibh->soffset); } } memset(cfi, 0, sizeof(struct fileIdentDesc)); if (UDF_SB_UDFREV(sb) >= 0x0200) - udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, sizeof(tag)); + udf_new_tag((char *)cfi, TAG_IDENT_FID, 3, 1, block, + sizeof(tag)); else - udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, sizeof(tag)); + udf_new_tag((char *)cfi, TAG_IDENT_FID, 2, 1, block, + sizeof(tag)); cfi->fileVersionNum = cpu_to_le16(1); cfi->lengthFileIdent = namelen; cfi->lengthOfImpUse = cpu_to_le16(0); - if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) - { + if (!udf_write_fi(dir, cfi, fi, fibh, NULL, name)) { brelse(epos.bh); dir->i_size += nfidlen; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) UDF_I_LENALLOC(dir) += nfidlen; mark_inode_dirty(dir); return fi; - } - else - { + } else { brelse(epos.bh); if (fibh->sbh != fibh->ebh) brelse(fibh->ebh); @@ -607,7 +595,8 @@ add: } static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, - struct udf_fileident_bh *fibh, struct fileIdentDesc *cfi) + struct udf_fileident_bh *fibh, + struct fileIdentDesc *cfi) { cfi->fileCharacteristics |= FID_FILE_CHAR_DELETED; if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT)) @@ -615,7 +604,8 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); } -static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +static int udf_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct udf_fileident_bh fibh; struct inode *inode; @@ -624,8 +614,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct lock_kernel(); inode = udf_new_inode(dir, mode, &err); - if (!inode) - { + if (!inode) { unlock_kernel(); return err; } @@ -639,9 +628,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct inode->i_mode = mode; mark_inode_dirty(inode); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) - { - inode->i_nlink --; + if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { + inode->i_nlink--; mark_inode_dirty(inode); iput(inode); unlock_kernel(); @@ -649,11 +637,10 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); + *(__le32 *) ((struct allocDescImpUse *)cfi.icb.impUse)->impUse = + cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { mark_inode_dirty(dir); } if (fibh.sbh != fibh.ebh) @@ -664,9 +651,10 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct return 0; } -static int udf_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev) +static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t rdev) { - struct inode * inode; + struct inode *inode; struct udf_fileident_bh fibh; struct fileIdentDesc cfi, *fi; int err; @@ -682,9 +670,8 @@ static int udf_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t inode->i_uid = current->fsuid; init_special_inode(inode, mode, rdev); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) - { - inode->i_nlink --; + if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { + inode->i_nlink--; mark_inode_dirty(inode); iput(inode); unlock_kernel(); @@ -692,11 +679,10 @@ static int udf_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); + *(__le32 *) ((struct allocDescImpUse *)cfi.icb.impUse)->impUse = + cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { mark_inode_dirty(dir); } mark_inode_dirty(inode); @@ -706,21 +692,21 @@ static int udf_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t brelse(fibh.sbh); d_instantiate(dentry, inode); err = 0; -out: + out: unlock_kernel(); return err; } -static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) +static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - struct inode * inode; + struct inode *inode; struct udf_fileident_bh fibh; struct fileIdentDesc cfi, *fi; int err; lock_kernel(); err = -EMLINK; - if (dir->i_nlink >= (256<<sizeof(dir->i_nlink))-1) + if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) goto out; err = -EIO; @@ -730,8 +716,7 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_op = &udf_dir_inode_operations; inode->i_fop = &udf_dir_operations; - if (!(fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err))) - { + if (!(fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err))) { inode->i_nlink--; mark_inode_dirty(inode); iput(inode); @@ -740,9 +725,10 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_nlink = 2; cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(dir)); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(dir) & 0x00000000FFFFFFFFUL); - cfi.fileCharacteristics = FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; + *(__le32 *) ((struct allocDescImpUse *)cfi.icb.impUse)->impUse = + cpu_to_le32(UDF_I_UNIQUE(dir) & 0x00000000FFFFFFFFUL); + cfi.fileCharacteristics = + FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); brelse(fibh.sbh); inode->i_mode = S_IFDIR | mode; @@ -750,8 +736,7 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_mode |= S_ISGID; mark_inode_dirty(inode); - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) - { + if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { inode->i_nlink = 0; mark_inode_dirty(inode); iput(inode); @@ -759,8 +744,8 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); + *(__le32 *) ((struct allocDescImpUse *)cfi.icb.impUse)->impUse = + cpu_to_le32(UDF_I_UNIQUE(inode) & 0x00000000FFFFFFFFUL); cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY; udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); inc_nlink(dir); @@ -770,7 +755,7 @@ static int udf_mkdir(struct inode * dir, struct dentry * dentry, int mode) brelse(fibh.ebh); brelse(fibh.sbh); err = 0; -out: + out: unlock_kernel(); return err; } @@ -785,47 +770,41 @@ static int empty_dir(struct inode *dir) kernel_lb_addr eloc; uint32_t elen; sector_t offset; - struct extent_position epos = { NULL, 0, { 0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; f_pos = (udf_ext0_offset(dir) >> 2); - fibh.soffset = fibh.eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; + fibh.soffset = fibh.eoffset = + (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) fibh.sbh = fibh.ebh = NULL; else if (inode_bmap(dir, f_pos >> (dir->i_sb->s_blocksize_bits - 2), - &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) - { + &epos, &eloc, &elen, + &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); - if ((++offset << dir->i_sb->s_blocksize_bits) < elen) - { + if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_SHORT) epos.offset -= sizeof(short_ad); else if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_LONG) epos.offset -= sizeof(long_ad); - } - else + } else offset = 0; - if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) - { + if (!(fibh.sbh = fibh.ebh = udf_tread(dir->i_sb, block))) { brelse(epos.bh); return 0; } - } - else - { + } else { brelse(epos.bh); return 0; } + while ((f_pos < size)) { + fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &epos, &eloc, + &elen, &offset); - while ( (f_pos < size) ) - { - fi = udf_fileident_read(dir, &f_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); - - if (!fi) - { + if (!fi) { if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); @@ -833,8 +812,8 @@ static int empty_dir(struct inode *dir) return 0; } - if (cfi.lengthFileIdent && (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) == 0) - { + if (cfi.lengthFileIdent + && (cfi.fileCharacteristics & FID_FILE_CHAR_DELETED) == 0) { if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); @@ -849,10 +828,10 @@ static int empty_dir(struct inode *dir) return 1; } -static int udf_rmdir(struct inode * dir, struct dentry * dentry) +static int udf_rmdir(struct inode *dir, struct dentry *dentry) { int retval; - struct inode * inode = dentry->d_inode; + struct inode *inode = dentry->d_inode; struct udf_fileident_bh fibh; struct fileIdentDesc *fi, cfi; kernel_lb_addr tloc; @@ -875,27 +854,28 @@ static int udf_rmdir(struct inode * dir, struct dentry * dentry) goto end_rmdir; if (inode->i_nlink != 2) udf_warning(inode->i_sb, "udf_rmdir", - "empty directory has nlink != 2 (%d)", - inode->i_nlink); + "empty directory has nlink != 2 (%d)", + inode->i_nlink); clear_nlink(inode); inode->i_size = 0; inode_dec_link_count(dir); - inode->i_ctime = dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb); + inode->i_ctime = dir->i_ctime = dir->i_mtime = + current_fs_time(dir->i_sb); mark_inode_dirty(dir); -end_rmdir: + end_rmdir: if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); -out: + out: unlock_kernel(); return retval; } -static int udf_unlink(struct inode * dir, struct dentry * dentry) +static int udf_unlink(struct inode *dir, struct dentry *dentry) { int retval; - struct inode * inode = dentry->d_inode; + struct inode *inode = dentry->d_inode; struct udf_fileident_bh fibh; struct fileIdentDesc *fi; struct fileIdentDesc cfi; @@ -912,10 +892,9 @@ static int udf_unlink(struct inode * dir, struct dentry * dentry) if (udf_get_lb_pblock(dir->i_sb, tloc, 0) != inode->i_ino) goto end_unlink; - if (!inode->i_nlink) - { + if (!inode->i_nlink) { udf_debug("Deleting nonexistent file (%lu), %d\n", - inode->i_ino, inode->i_nlink); + inode->i_ino, inode->i_nlink); inode->i_nlink = 1; } retval = udf_delete_entry(dir, fi, &fibh, &cfi); @@ -927,22 +906,23 @@ static int udf_unlink(struct inode * dir, struct dentry * dentry) inode->i_ctime = dir->i_ctime; retval = 0; -end_unlink: + end_unlink: if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); -out: + out: unlock_kernel(); return retval; } -static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * symname) +static int udf_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) { - struct inode * inode; + struct inode *inode; struct pathComponent *pc; char *compstart; struct udf_fileident_bh fibh; - struct extent_position epos = { NULL, 0, {0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; int eoffset, elen = 0; struct fileIdentDesc *fi; struct fileIdentDesc cfi; @@ -960,28 +940,31 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * inode->i_data.a_ops = &udf_symlink_aops; inode->i_op = &page_symlink_inode_operations; - if (UDF_I_ALLOCTYPE(inode) != ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(inode) != ICBTAG_FLAG_AD_IN_ICB) { kernel_lb_addr eloc; uint32_t elen; block = udf_new_block(inode->i_sb, inode, - UDF_I_LOCATION(inode).partitionReferenceNum, - UDF_I_LOCATION(inode).logicalBlockNum, &err); + UDF_I_LOCATION(inode). + partitionReferenceNum, + UDF_I_LOCATION(inode).logicalBlockNum, + &err); if (!block) goto out_no_entry; epos.block = UDF_I_LOCATION(inode); epos.offset = udf_file_entry_alloc_offset(inode); epos.bh = NULL; eloc.logicalBlockNum = block; - eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum; + eloc.partitionReferenceNum = + UDF_I_LOCATION(inode).partitionReferenceNum; elen = inode->i_sb->s_blocksize; UDF_I_LENEXTENTS(inode) = elen; udf_add_aext(inode, &epos, eloc, elen, 0); brelse(epos.bh); block = udf_get_pblock(inode->i_sb, block, - UDF_I_LOCATION(inode).partitionReferenceNum, 0); + UDF_I_LOCATION(inode). + partitionReferenceNum, 0); epos.bh = udf_tread(inode->i_sb, block); lock_buffer(epos.bh); memset(epos.bh->b_data, 0x00, inode->i_sb->s_blocksize); @@ -989,17 +972,14 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * unlock_buffer(epos.bh); mark_buffer_dirty_inode(epos.bh, inode); ea = epos.bh->b_data + udf_ext0_offset(inode); - } - else + } else ea = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode); pc = (struct pathComponent *)ea; - if (*symname == '/') - { - do - { + if (*symname == '/') { + do { symname++; } while (*symname == '/'); @@ -1012,8 +992,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * err = -ENAMETOOLONG; - while (*symname) - { + while (*symname) { if (elen + sizeof(struct pathComponent) > eoffset) goto out_no_entry; @@ -1021,28 +1000,30 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * compstart = (char *)symname; - do - { + do { symname++; } while (*symname && *symname != '/'); pc->componentType = 5; pc->lengthComponentIdent = 0; pc->componentFileVersionNum = 0; - if (compstart[0] == '.') - { - if ((symname-compstart) == 1) + if (compstart[0] == '.') { + if ((symname - compstart) == 1) pc->componentType = 4; - else if ((symname-compstart) == 2 && compstart[1] == '.') + else if ((symname - compstart) == 2 + && compstart[1] == '.') pc->componentType = 3; } - if (pc->componentType == 5) - { - if ( !(namelen = udf_put_filename(inode->i_sb, compstart, name, symname-compstart))) + if (pc->componentType == 5) { + if (! + (namelen = + udf_put_filename(inode->i_sb, compstart, name, + symname - compstart))) goto out_no_entry; - if (elen + sizeof(struct pathComponent) + namelen > eoffset) + if (elen + sizeof(struct pathComponent) + namelen > + eoffset) goto out_no_entry; else pc->lengthComponentIdent = namelen; @@ -1052,10 +1033,8 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * elen += sizeof(struct pathComponent) + pc->lengthComponentIdent; - if (*symname) - { - do - { + if (*symname) { + do { symname++; } while (*symname == '/'); } @@ -1071,22 +1050,22 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * goto out_no_entry; cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - if (UDF_SB_LVIDBH(inode->i_sb)) - { + if (UDF_SB_LVIDBH(inode->i_sb)) { struct logicalVolHeaderDesc *lvhd; uint64_t uniqueID; - lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)->logicalVolContentsUse); + lvhd = + (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)-> + logicalVolContentsUse); uniqueID = le64_to_cpu(lvhd->uniqueID); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); + *(__le32 *) ((struct allocDescImpUse *)cfi.icb.impUse)->impUse = + cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; lvhd->uniqueID = cpu_to_le64(uniqueID); mark_buffer_dirty(UDF_SB_LVIDBH(inode->i_sb)); } udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { mark_inode_dirty(dir); } if (fibh.sbh != fibh.ebh) @@ -1095,18 +1074,18 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * d_instantiate(dentry, inode); err = 0; -out: + out: unlock_kernel(); return err; -out_no_entry: + out_no_entry: inode_dec_link_count(inode); iput(inode); goto out; } -static int udf_link(struct dentry * old_dentry, struct inode * dir, - struct dentry *dentry) +static int udf_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; struct udf_fileident_bh fibh; @@ -1114,35 +1093,33 @@ static int udf_link(struct dentry * old_dentry, struct inode * dir, int err; lock_kernel(); - if (inode->i_nlink >= (256<<sizeof(inode->i_nlink))-1) - { + if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { unlock_kernel(); return -EMLINK; } - if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) - { + if (!(fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err))) { unlock_kernel(); return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(inode)); - if (UDF_SB_LVIDBH(inode->i_sb)) - { + if (UDF_SB_LVIDBH(inode->i_sb)) { struct logicalVolHeaderDesc *lvhd; uint64_t uniqueID; - lvhd = (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)->logicalVolContentsUse); + lvhd = + (struct logicalVolHeaderDesc *)(UDF_SB_LVID(inode->i_sb)-> + logicalVolContentsUse); uniqueID = le64_to_cpu(lvhd->uniqueID); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); + *(__le32 *) ((struct allocDescImpUse *)cfi.icb.impUse)->impUse = + cpu_to_le32(uniqueID & 0x00000000FFFFFFFFUL); if (!(++uniqueID & 0x00000000FFFFFFFFUL)) uniqueID += 16; lvhd->uniqueID = cpu_to_le64(uniqueID); mark_buffer_dirty(UDF_SB_LVIDBH(inode->i_sb)); } udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(dir) == ICBTAG_FLAG_AD_IN_ICB) { mark_inode_dirty(dir); } if (fibh.sbh != fibh.ebh) @@ -1160,80 +1137,80 @@ static int udf_link(struct dentry * old_dentry, struct inode * dir, /* Anybody can rename anything with this: the permission checks are left to the * higher-level routines. */ -static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry) +static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { - struct inode * old_inode = old_dentry->d_inode; - struct inode * new_inode = new_dentry->d_inode; + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; struct udf_fileident_bh ofibh, nfibh; - struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = NULL, ocfi, ncfi; + struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = + NULL, ocfi, ncfi; struct buffer_head *dir_bh = NULL; int retval = -ENOENT; kernel_lb_addr tloc; lock_kernel(); - if ((ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi))) - { + if ((ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi))) { if (ofibh.sbh != ofibh.ebh) brelse(ofibh.ebh); brelse(ofibh.sbh); } tloc = lelb_to_cpu(ocfi.icb.extLocation); if (!ofi || udf_get_lb_pblock(old_dir->i_sb, tloc, 0) - != old_inode->i_ino) + != old_inode->i_ino) goto end_rename; nfi = udf_find_entry(new_dir, new_dentry, &nfibh, &ncfi); - if (nfi) - { - if (!new_inode) - { + if (nfi) { + if (!new_inode) { if (nfibh.sbh != nfibh.ebh) brelse(nfibh.ebh); brelse(nfibh.sbh); nfi = NULL; } } - if (S_ISDIR(old_inode->i_mode)) - { + if (S_ISDIR(old_inode->i_mode)) { uint32_t offset = udf_ext0_offset(old_inode); - if (new_inode) - { + if (new_inode) { retval = -ENOTEMPTY; if (!empty_dir(new_inode)) goto end_rename; } retval = -EIO; - if (UDF_I_ALLOCTYPE(old_inode) == ICBTAG_FLAG_AD_IN_ICB) - { + if (UDF_I_ALLOCTYPE(old_inode) == ICBTAG_FLAG_AD_IN_ICB) { dir_fi = udf_get_fileident(UDF_I_DATA(old_inode) - - (UDF_I_EFE(old_inode) ? - sizeof(struct extendedFileEntry) : - sizeof(struct fileEntry)), - old_inode->i_sb->s_blocksize, &offset); - } - else - { + (UDF_I_EFE(old_inode) ? + sizeof(struct + extendedFileEntry) : + sizeof(struct fileEntry)), + old_inode->i_sb->s_blocksize, + &offset); + } else { dir_bh = udf_bread(old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; - dir_fi = udf_get_fileident(dir_bh->b_data, old_inode->i_sb->s_blocksize, &offset); + dir_fi = + udf_get_fileident(dir_bh->b_data, + old_inode->i_sb->s_blocksize, + &offset); } if (!dir_fi) goto end_rename; tloc = lelb_to_cpu(dir_fi->icb.extLocation); if (udf_get_lb_pblock(old_inode->i_sb, tloc, 0) - != old_dir->i_ino) + != old_dir->i_ino) goto end_rename; retval = -EMLINK; - if (!new_inode && new_dir->i_nlink >= (256<<sizeof(new_dir->i_nlink))-1) + if (!new_inode + && new_dir->i_nlink >= + (256 << sizeof(new_dir->i_nlink)) - 1) goto end_rename; } - if (!nfi) - { - nfi = udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, &retval); + if (!nfi) { + nfi = + udf_add_entry(new_dir, new_dentry, &nfibh, &ncfi, &retval); if (!nfi) goto end_rename; } @@ -1257,39 +1234,33 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); - if (new_inode) - { + if (new_inode) { new_inode->i_ctime = current_fs_time(new_inode->i_sb); inode_dec_link_count(new_inode); } old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb); mark_inode_dirty(old_dir); - if (dir_fi) - { + if (dir_fi) { dir_fi->icb.extLocation = cpu_to_lelb(UDF_I_LOCATION(new_dir)); udf_update_tag((char *)dir_fi, (sizeof(struct fileIdentDesc) + - le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3); - if (UDF_I_ALLOCTYPE(old_inode) == ICBTAG_FLAG_AD_IN_ICB) - { + le16_to_cpu(dir_fi-> + lengthOfImpUse) + + 3) & ~3); + if (UDF_I_ALLOCTYPE(old_inode) == ICBTAG_FLAG_AD_IN_ICB) { mark_inode_dirty(old_inode); - } - else + } else mark_buffer_dirty_inode(dir_bh, old_inode); inode_dec_link_count(old_dir); - if (new_inode) - { + if (new_inode) { inode_dec_link_count(new_inode); - } - else - { + } else { inc_nlink(new_dir); mark_inode_dirty(new_dir); } } - if (ofi) - { + if (ofi) { if (ofibh.sbh != ofibh.ebh) brelse(ofibh.ebh); brelse(ofibh.sbh); @@ -1297,10 +1268,9 @@ static int udf_rename (struct inode * old_dir, struct dentry * old_dentry, retval = 0; -end_rename: + end_rename: brelse(dir_bh); - if (nfi) - { + if (nfi) { if (nfibh.sbh != nfibh.ebh) brelse(nfibh.ebh); brelse(nfibh.sbh); @@ -1310,13 +1280,13 @@ end_rename: } const struct inode_operations udf_dir_inode_operations = { - .lookup = udf_lookup, - .create = udf_create, - .link = udf_link, - .unlink = udf_unlink, - .symlink = udf_symlink, - .mkdir = udf_mkdir, - .rmdir = udf_rmdir, - .mknod = udf_mknod, - .rename = udf_rename, + .lookup = udf_lookup, + .create = udf_create, + .link = udf_link, + .unlink = udf_unlink, + .symlink = udf_symlink, + .mkdir = udf_mkdir, + .rmdir = udf_rmdir, + .mknod = udf_mknod, + .rename = udf_rename, }; diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h index e82aae652697..bec5d340d8c5 100644 --- a/fs/udf/osta_udf.h +++ b/fs/udf/osta_udf.h @@ -65,153 +65,140 @@ #define IS_DF_HARD_WRITE_PROTECT 0x01 #define IS_DF_SOFT_WRITE_PROTECT 0x02 -struct UDFIdentSuffix -{ - __le16 UDFRevision; - uint8_t OSClass; - uint8_t OSIdentifier; - uint8_t reserved[4]; +struct UDFIdentSuffix { + __le16 UDFRevision; + uint8_t OSClass; + uint8_t OSIdentifier; + uint8_t reserved[4]; } __attribute__ ((packed)); -struct impIdentSuffix -{ - uint8_t OSClass; - uint8_t OSIdentifier; - uint8_t reserved[6]; +struct impIdentSuffix { + uint8_t OSClass; + uint8_t OSIdentifier; + uint8_t reserved[6]; } __attribute__ ((packed)); -struct appIdentSuffix -{ - uint8_t impUse[8]; +struct appIdentSuffix { + uint8_t impUse[8]; } __attribute__ ((packed)); /* Logical Volume Integrity Descriptor (UDF 2.50 2.2.6) */ /* Implementation Use (UDF 2.50 2.2.6.4) */ -struct logicalVolIntegrityDescImpUse -{ - regid impIdent; - __le32 numFiles; - __le32 numDirs; - __le16 minUDFReadRev; - __le16 minUDFWriteRev; - __le16 maxUDFWriteRev; - uint8_t impUse[0]; +struct logicalVolIntegrityDescImpUse { + regid impIdent; + __le32 numFiles; + __le32 numDirs; + __le16 minUDFReadRev; + __le16 minUDFWriteRev; + __le16 maxUDFWriteRev; + uint8_t impUse[0]; } __attribute__ ((packed)); /* Implementation Use Volume Descriptor (UDF 2.50 2.2.7) */ /* Implementation Use (UDF 2.50 2.2.7.2) */ -struct impUseVolDescImpUse -{ - charspec LVICharset; - dstring logicalVolIdent[128]; - dstring LVInfo1[36]; - dstring LVInfo2[36]; - dstring LVInfo3[36]; - regid impIdent; - uint8_t impUse[128]; +struct impUseVolDescImpUse { + charspec LVICharset; + dstring logicalVolIdent[128]; + dstring LVInfo1[36]; + dstring LVInfo2[36]; + dstring LVInfo3[36]; + regid impIdent; + uint8_t impUse[128]; } __attribute__ ((packed)); -struct udfPartitionMap2 -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - uint8_t reserved1[2]; - regid partIdent; - __le16 volSeqNum; - __le16 partitionNum; +struct udfPartitionMap2 { + uint8_t partitionMapType; + uint8_t partitionMapLength; + uint8_t reserved1[2]; + regid partIdent; + __le16 volSeqNum; + __le16 partitionNum; } __attribute__ ((packed)); /* Virtual Partition Map (UDF 2.50 2.2.8) */ -struct virtualPartitionMap -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - uint8_t reserved1[2]; - regid partIdent; - __le16 volSeqNum; - __le16 partitionNum; - uint8_t reserved2[24]; +struct virtualPartitionMap { + uint8_t partitionMapType; + uint8_t partitionMapLength; + uint8_t reserved1[2]; + regid partIdent; + __le16 volSeqNum; + __le16 partitionNum; + uint8_t reserved2[24]; } __attribute__ ((packed)); /* Sparable Partition Map (UDF 2.50 2.2.9) */ -struct sparablePartitionMap -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - uint8_t reserved1[2]; - regid partIdent; - __le16 volSeqNum; - __le16 partitionNum; - __le16 packetLength; - uint8_t numSparingTables; - uint8_t reserved2[1]; - __le32 sizeSparingTable; - __le32 locSparingTable[4]; +struct sparablePartitionMap { + uint8_t partitionMapType; + uint8_t partitionMapLength; + uint8_t reserved1[2]; + regid partIdent; + __le16 volSeqNum; + __le16 partitionNum; + __le16 packetLength; + uint8_t numSparingTables; + uint8_t reserved2[1]; + __le32 sizeSparingTable; + __le32 locSparingTable[4]; } __attribute__ ((packed)); /* Metadata Partition Map (UDF 2.4.0 2.2.10) */ -struct metadataPartitionMap -{ - uint8_t partitionMapType; - uint8_t partitionMapLength; - uint8_t reserved1[2]; - regid partIdent; - __le16 volSeqNum; - __le16 partitionNum; - __le32 metadataFileLoc; - __le32 metadataMirrorFileLoc; - __le32 metadataBitmapFileLoc; - __le32 allocUnitSize; - __le16 alignUnitSize; - uint8_t flags; - uint8_t reserved2[5]; +struct metadataPartitionMap { + uint8_t partitionMapType; + uint8_t partitionMapLength; + uint8_t reserved1[2]; + regid partIdent; + __le16 volSeqNum; + __le16 partitionNum; + __le32 metadataFileLoc; + __le32 metadataMirrorFileLoc; + __le32 metadataBitmapFileLoc; + __le32 allocUnitSize; + __le16 alignUnitSize; + uint8_t flags; + uint8_t reserved2[5]; } __attribute__ ((packed)); /* Virtual Allocation Table (UDF 1.5 2.2.10) */ -struct virtualAllocationTable15 -{ - __le32 VirtualSector[0]; - regid vatIdent; - __le32 previousVATICBLoc; -} __attribute__ ((packed)); +struct virtualAllocationTable15 { + __le32 VirtualSector[0]; + regid vatIdent; + __le32 previousVATICBLoc; +} __attribute__ ((packed)); #define ICBTAG_FILE_TYPE_VAT15 0x00U /* Virtual Allocation Table (UDF 2.50 2.2.11) */ -struct virtualAllocationTable20 -{ - __le16 lengthHeader; - __le16 lengthImpUse; - dstring logicalVolIdent[128]; - __le32 previousVATICBLoc; - __le32 numFiles; - __le32 numDirs; - __le16 minReadRevision; - __le16 minWriteRevision; - __le16 maxWriteRevision; - __le16 reserved; - uint8_t impUse[0]; - __le32 vatEntry[0]; +struct virtualAllocationTable20 { + __le16 lengthHeader; + __le16 lengthImpUse; + dstring logicalVolIdent[128]; + __le32 previousVATICBLoc; + __le32 numFiles; + __le32 numDirs; + __le16 minReadRevision; + __le16 minWriteRevision; + __le16 maxWriteRevision; + __le16 reserved; + uint8_t impUse[0]; + __le32 vatEntry[0]; } __attribute__ ((packed)); #define ICBTAG_FILE_TYPE_VAT20 0xF8U /* Sparing Table (UDF 2.50 2.2.12) */ -struct sparingEntry -{ - __le32 origLocation; - __le32 mappedLocation; +struct sparingEntry { + __le32 origLocation; + __le32 mappedLocation; } __attribute__ ((packed)); -struct sparingTable -{ - tag descTag; - regid sparingIdent; - __le16 reallocationTableLen; - __le16 reserved; - __le32 sequenceNum; +struct sparingTable { + tag descTag; + regid sparingIdent; + __le16 reallocationTableLen; + __le16 reserved; + __le32 sequenceNum; struct sparingEntry - mapEntry[0]; + mapEntry[0]; } __attribute__ ((packed)); /* Metadata File (and Metadata Mirror File) (UDF 2.50 2.2.13.1) */ @@ -220,10 +207,9 @@ struct sparingTable #define ICBTAG_FILE_TYPE_BITMAP 0xFC /* struct long_ad ICB - ADImpUse (UDF 2.50 2.2.4.3) */ -struct allocDescImpUse -{ - __le16 flags; - uint8_t impUse[4]; +struct allocDescImpUse { + __le16 flags; + uint8_t impUse[4]; } __attribute__ ((packed)); #define AD_IU_EXT_ERASED 0x0001 @@ -233,27 +219,24 @@ struct allocDescImpUse /* Implementation Use Extended Attribute (UDF 2.50 3.3.4.5) */ /* FreeEASpace (UDF 2.50 3.3.4.5.1.1) */ -struct freeEaSpace -{ - __le16 headerChecksum; - uint8_t freeEASpace[0]; +struct freeEaSpace { + __le16 headerChecksum; + uint8_t freeEASpace[0]; } __attribute__ ((packed)); /* DVD Copyright Management Information (UDF 2.50 3.3.4.5.1.2) */ -struct DVDCopyrightImpUse -{ - __le16 headerChecksum; - uint8_t CGMSInfo; - uint8_t dataType; - uint8_t protectionSystemInfo[4]; +struct DVDCopyrightImpUse { + __le16 headerChecksum; + uint8_t CGMSInfo; + uint8_t dataType; + uint8_t protectionSystemInfo[4]; } __attribute__ ((packed)); /* Application Use Extended Attribute (UDF 2.50 3.3.4.6) */ /* FreeAppEASpace (UDF 2.50 3.3.4.6.1) */ -struct freeAppEASpace -{ - __le16 headerChecksum; - uint8_t freeEASpace[0]; +struct freeAppEASpace { + __le16 headerChecksum; + uint8_t freeEASpace[0]; } __attribute__ ((packed)); /* UDF Defined System Stream (UDF 2.50 3.3.7) */ @@ -293,4 +276,4 @@ struct freeAppEASpace #define UDF_OS_ID_BEOS 0x00U #define UDF_OS_ID_WINCE 0x00U -#endif /* _OSTA_UDF_H */ +#endif /* _OSTA_UDF_H */ diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 467a26171cd9..a95d830a674d 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -28,106 +28,120 @@ #include <linux/slab.h> #include <linux/buffer_head.h> -inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) +inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, + uint16_t partition, uint32_t offset) { - if (partition >= UDF_SB_NUMPARTS(sb)) - { - udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n", - block, partition, offset); + if (partition >= UDF_SB_NUMPARTS(sb)) { + udf_debug + ("block=%d, partition=%d, offset=%d: invalid partition\n", + block, partition, offset); return 0xFFFFFFFF; } if (UDF_SB_PARTFUNC(sb, partition)) - return UDF_SB_PARTFUNC(sb, partition)(sb, block, partition, offset); + return UDF_SB_PARTFUNC(sb, partition) (sb, block, partition, + offset); else return UDF_SB_PARTROOT(sb, partition) + block + offset; } -uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) +uint32_t udf_get_pblock_virt15(struct super_block * sb, uint32_t block, + uint16_t partition, uint32_t offset) { struct buffer_head *bh = NULL; uint32_t newblock; uint32_t index; uint32_t loc; - index = (sb->s_blocksize - UDF_SB_TYPEVIRT(sb,partition).s_start_offset) / sizeof(uint32_t); + index = + (sb->s_blocksize - + UDF_SB_TYPEVIRT(sb, partition).s_start_offset) / sizeof(uint32_t); - if (block > UDF_SB_TYPEVIRT(sb,partition).s_num_entries) - { - udf_debug("Trying to access block beyond end of VAT (%d max %d)\n", - block, UDF_SB_TYPEVIRT(sb,partition).s_num_entries); + if (block > UDF_SB_TYPEVIRT(sb, partition).s_num_entries) { + udf_debug + ("Trying to access block beyond end of VAT (%d max %d)\n", + block, UDF_SB_TYPEVIRT(sb, partition).s_num_entries); return 0xFFFFFFFF; } - if (block >= index) - { + if (block >= index) { block -= index; newblock = 1 + (block / (sb->s_blocksize / sizeof(uint32_t))); index = block % (sb->s_blocksize / sizeof(uint32_t)); - } - else - { + } else { newblock = 0; - index = UDF_SB_TYPEVIRT(sb,partition).s_start_offset / sizeof(uint32_t) + block; + index = + UDF_SB_TYPEVIRT(sb, + partition).s_start_offset / + sizeof(uint32_t) + block; } loc = udf_block_map(UDF_SB_VAT(sb), newblock); - if (!(bh = sb_bread(sb, loc))) - { + if (!(bh = sb_bread(sb, loc))) { udf_debug("get_pblock(UDF_VIRTUAL_MAP:%p,%d,%d) VAT: %d[%d]\n", - sb, block, partition, loc, index); + sb, block, partition, loc, index); return 0xFFFFFFFF; } - loc = le32_to_cpu(((__le32 *)bh->b_data)[index]); + loc = le32_to_cpu(((__le32 *) bh->b_data)[index]); brelse(bh); - if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition) - { + if (UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum == partition) { udf_debug("recursive call to udf_get_pblock!\n"); return 0xFFFFFFFF; } - return udf_get_pblock(sb, loc, UDF_I_LOCATION(UDF_SB_VAT(sb)).partitionReferenceNum, offset); + return udf_get_pblock(sb, loc, + UDF_I_LOCATION(UDF_SB_VAT(sb)). + partitionReferenceNum, offset); } -inline uint32_t udf_get_pblock_virt20(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) +inline uint32_t udf_get_pblock_virt20(struct super_block * sb, uint32_t block, + uint16_t partition, uint32_t offset) { return udf_get_pblock_virt15(sb, block, partition, offset); } -uint32_t udf_get_pblock_spar15(struct super_block *sb, uint32_t block, uint16_t partition, uint32_t offset) +uint32_t udf_get_pblock_spar15(struct super_block * sb, uint32_t block, + uint16_t partition, uint32_t offset) { int i; struct sparingTable *st = NULL; - uint32_t packet = (block + offset) & ~(UDF_SB_TYPESPAR(sb,partition).s_packet_len - 1); + uint32_t packet = + (block + offset) & ~(UDF_SB_TYPESPAR(sb, partition).s_packet_len - + 1); - for (i=0; i<4; i++) - { - if (UDF_SB_TYPESPAR(sb,partition).s_spar_map[i] != NULL) - { - st = (struct sparingTable *)UDF_SB_TYPESPAR(sb,partition).s_spar_map[i]->b_data; + for (i = 0; i < 4; i++) { + if (UDF_SB_TYPESPAR(sb, partition).s_spar_map[i] != NULL) { + st = (struct sparingTable *)UDF_SB_TYPESPAR(sb, + partition). + s_spar_map[i]->b_data; break; } } - if (st) - { - for (i=0; i<le16_to_cpu(st->reallocationTableLen); i++) - { - if (le32_to_cpu(st->mapEntry[i].origLocation) >= 0xFFFFFFF0) + if (st) { + for (i = 0; i < le16_to_cpu(st->reallocationTableLen); i++) { + if (le32_to_cpu(st->mapEntry[i].origLocation) >= + 0xFFFFFFF0) break; - else if (le32_to_cpu(st->mapEntry[i].origLocation) == packet) - { - return le32_to_cpu(st->mapEntry[i].mappedLocation) + - ((block + offset) & (UDF_SB_TYPESPAR(sb,partition).s_packet_len - 1)); - } - else if (le32_to_cpu(st->mapEntry[i].origLocation) > packet) + else if (le32_to_cpu(st->mapEntry[i].origLocation) == + packet) { + return le32_to_cpu(st->mapEntry[i]. + mappedLocation) + ((block + + offset) & + (UDF_SB_TYPESPAR + (sb, + partition). + s_packet_len + - 1)); + } else if (le32_to_cpu(st->mapEntry[i].origLocation) > + packet) break; } } - return UDF_SB_PARTROOT(sb,partition) + block + offset; + return UDF_SB_PARTROOT(sb, partition) + block + offset; } int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) @@ -138,19 +152,21 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) uint32_t packet; int i, j, k, l; - for (i=0; i<UDF_SB_NUMPARTS(sb); i++) - { - if (old_block > UDF_SB_PARTROOT(sb,i) && - old_block < UDF_SB_PARTROOT(sb,i) + UDF_SB_PARTLEN(sb,i)) + for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) { + if (old_block > UDF_SB_PARTROOT(sb, i) && + old_block < UDF_SB_PARTROOT(sb, i) + UDF_SB_PARTLEN(sb, i)) { - sdata = &UDF_SB_TYPESPAR(sb,i); - packet = (old_block - UDF_SB_PARTROOT(sb,i)) & ~(sdata->s_packet_len - 1); - - for (j=0; j<4; j++) - { - if (UDF_SB_TYPESPAR(sb,i).s_spar_map[j] != NULL) - { - st = (struct sparingTable *)sdata->s_spar_map[j]->b_data; + sdata = &UDF_SB_TYPESPAR(sb, i); + packet = + (old_block - + UDF_SB_PARTROOT(sb, + i)) & ~(sdata->s_packet_len - 1); + + for (j = 0; j < 4; j++) { + if (UDF_SB_TYPESPAR(sb, i).s_spar_map[j] != + NULL) { + st = (struct sparingTable *)sdata-> + s_spar_map[j]->b_data; break; } } @@ -158,60 +174,123 @@ int udf_relocate_blocks(struct super_block *sb, long old_block, long *new_block) if (!st) return 1; - for (k=0; k<le16_to_cpu(st->reallocationTableLen); k++) - { - if (le32_to_cpu(st->mapEntry[k].origLocation) == 0xFFFFFFFF) - { - for (; j<4; j++) - { - if (sdata->s_spar_map[j]) - { - st = (struct sparingTable *)sdata->s_spar_map[j]->b_data; - st->mapEntry[k].origLocation = cpu_to_le32(packet); - udf_update_tag((char *)st, sizeof(struct sparingTable) + le16_to_cpu(st->reallocationTableLen) * sizeof(struct sparingEntry)); - mark_buffer_dirty(sdata->s_spar_map[j]); + for (k = 0; k < le16_to_cpu(st->reallocationTableLen); + k++) { + if (le32_to_cpu(st->mapEntry[k].origLocation) == + 0xFFFFFFFF) { + for (; j < 4; j++) { + if (sdata->s_spar_map[j]) { + st = (struct + sparingTable *) + sdata-> + s_spar_map[j]-> + b_data; + st->mapEntry[k]. + origLocation = + cpu_to_le32(packet); + udf_update_tag((char *) + st, + sizeof + (struct + sparingTable) + + + le16_to_cpu + (st-> + reallocationTableLen) + * + sizeof + (struct + sparingEntry)); + mark_buffer_dirty + (sdata-> + s_spar_map[j]); } } - *new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) + - ((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1)); + *new_block = + le32_to_cpu(st->mapEntry[k]. + mappedLocation) + + ((old_block - + UDF_SB_PARTROOT(sb, + i)) & (sdata-> + s_packet_len + - 1)); return 0; - } - else if (le32_to_cpu(st->mapEntry[k].origLocation) == packet) - { - *new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) + - ((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1)); + } else + if (le32_to_cpu + (st->mapEntry[k].origLocation) == + packet) { + *new_block = + le32_to_cpu(st->mapEntry[k]. + mappedLocation) + + ((old_block - + UDF_SB_PARTROOT(sb, + i)) & (sdata-> + s_packet_len + - 1)); return 0; - } - else if (le32_to_cpu(st->mapEntry[k].origLocation) > packet) + } else + if (le32_to_cpu + (st->mapEntry[k].origLocation) > packet) break; } - for (l=k; l<le16_to_cpu(st->reallocationTableLen); l++) - { - if (le32_to_cpu(st->mapEntry[l].origLocation) == 0xFFFFFFFF) - { - for (; j<4; j++) - { - if (sdata->s_spar_map[j]) - { - st = (struct sparingTable *)sdata->s_spar_map[j]->b_data; - mapEntry = st->mapEntry[l]; - mapEntry.origLocation = cpu_to_le32(packet); - memmove(&st->mapEntry[k+1], &st->mapEntry[k], (l-k)*sizeof(struct sparingEntry)); - st->mapEntry[k] = mapEntry; - udf_update_tag((char *)st, sizeof(struct sparingTable) + le16_to_cpu(st->reallocationTableLen) * sizeof(struct sparingEntry)); - mark_buffer_dirty(sdata->s_spar_map[j]); + for (l = k; l < le16_to_cpu(st->reallocationTableLen); + l++) { + if (le32_to_cpu(st->mapEntry[l].origLocation) == + 0xFFFFFFFF) { + for (; j < 4; j++) { + if (sdata->s_spar_map[j]) { + st = (struct + sparingTable *) + sdata-> + s_spar_map[j]-> + b_data; + mapEntry = + st->mapEntry[l]; + mapEntry.origLocation = + cpu_to_le32(packet); + memmove(&st-> + mapEntry[k + 1], + &st-> + mapEntry[k], + (l - + k) * + sizeof(struct + sparingEntry)); + st->mapEntry[k] = + mapEntry; + udf_update_tag((char *) + st, + sizeof + (struct + sparingTable) + + + le16_to_cpu + (st-> + reallocationTableLen) + * + sizeof + (struct + sparingEntry)); + mark_buffer_dirty + (sdata-> + s_spar_map[j]); } } - *new_block = le32_to_cpu(st->mapEntry[k].mappedLocation) + - ((old_block - UDF_SB_PARTROOT(sb,i)) & (sdata->s_packet_len - 1)); + *new_block = + le32_to_cpu(st->mapEntry[k]. + mappedLocation) + + ((old_block - + UDF_SB_PARTROOT(sb, + i)) & (sdata-> + s_packet_len + - 1)); return 0; } } return 1; } } - if (i == UDF_SB_NUMPARTS(sb)) - { + if (i == UDF_SB_NUMPARTS(sb)) { /* outside of partitions */ /* for now, fail =) */ return 1; diff --git a/fs/udf/super.c b/fs/udf/super.c index d6a504f5d758..911387aa1810 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -38,7 +38,7 @@ * 12/20/98 find the free space bitmap (if it exists) */ -#include "udfdecl.h" +#include "udfdecl.h" #include <linux/blkdev.h> #include <linux/slab.h> @@ -80,12 +80,15 @@ static int udf_remount_fs(struct super_block *, int *, char *); static int udf_check_valid(struct super_block *, int, int); static int udf_vrs(struct super_block *sb, int silent); static int udf_load_partition(struct super_block *, kernel_lb_addr *); -static int udf_load_logicalvol(struct super_block *, struct buffer_head *, kernel_lb_addr *); +static int udf_load_logicalvol(struct super_block *, struct buffer_head *, + kernel_lb_addr *); static void udf_load_logicalvolint(struct super_block *, kernel_extent_ad); static void udf_find_anchor(struct super_block *); -static int udf_find_fileset(struct super_block *, kernel_lb_addr *, kernel_lb_addr *); +static int udf_find_fileset(struct super_block *, kernel_lb_addr *, + kernel_lb_addr *); static void udf_load_pvoldesc(struct super_block *, struct buffer_head *); -static void udf_load_fileset(struct super_block *, struct buffer_head *, kernel_lb_addr *); +static void udf_load_fileset(struct super_block *, struct buffer_head *, + kernel_lb_addr *); static void udf_load_partdesc(struct super_block *, struct buffer_head *); static void udf_open_lvid(struct super_block *); static void udf_close_lvid(struct super_block *); @@ -94,25 +97,27 @@ static int udf_statfs(struct dentry *, struct kstatfs *); /* UDF filesystem type */ static int udf_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); } static struct file_system_type udf_fstype = { - .owner = THIS_MODULE, - .name = "udf", - .get_sb = udf_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "udf", + .get_sb = udf_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, }; -static struct kmem_cache * udf_inode_cachep; +static struct kmem_cache *udf_inode_cachep; static struct inode *udf_alloc_inode(struct super_block *sb) { struct udf_inode_info *ei; - ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL); + ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, + GFP_KERNEL); if (!ei) return NULL; @@ -130,9 +135,9 @@ static void udf_destroy_inode(struct inode *inode) kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } -static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { - struct udf_inode_info *ei = (struct udf_inode_info *) foo; + struct udf_inode_info *ei = (struct udf_inode_info *)foo; ei->i_ext.i_data = NULL; inode_init_once(&ei->vfs_inode); @@ -142,8 +147,8 @@ static int init_inodecache(void) { udf_inode_cachep = kmem_cache_create("udf_inode_cache", sizeof(struct udf_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), + 0, (SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD), init_once, NULL); if (udf_inode_cachep == NULL) return -ENOMEM; @@ -157,19 +162,18 @@ static void destroy_inodecache(void) /* Superblock operations */ static const struct super_operations udf_sb_ops = { - .alloc_inode = udf_alloc_inode, - .destroy_inode = udf_destroy_inode, - .write_inode = udf_write_inode, - .delete_inode = udf_delete_inode, - .clear_inode = udf_clear_inode, - .put_super = udf_put_super, - .write_super = udf_write_super, - .statfs = udf_statfs, - .remount_fs = udf_remount_fs, + .alloc_inode = udf_alloc_inode, + .destroy_inode = udf_destroy_inode, + .write_inode = udf_write_inode, + .delete_inode = udf_delete_inode, + .clear_inode = udf_clear_inode, + .put_super = udf_put_super, + .write_super = udf_write_super, + .statfs = udf_statfs, + .remount_fs = udf_remount_fs, }; -struct udf_options -{ +struct udf_options { unsigned char novrs; unsigned int blocksize; unsigned int session; @@ -196,9 +200,9 @@ static int __init init_udf_fs(void) if (err) goto out; return 0; -out: + out: destroy_inodecache(); -out1: + out1: return err; } @@ -209,7 +213,7 @@ static void __exit exit_udf_fs(void) } module_init(init_udf_fs) -module_exit(exit_udf_fs) + module_exit(exit_udf_fs) /* * udf_parse_options @@ -264,7 +268,6 @@ module_exit(exit_udf_fs) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ - enum { Opt_novrs, Opt_nostrict, Opt_bs, Opt_unhide, Opt_undelete, Opt_noadinicb, Opt_adinicb, Opt_shortad, Opt_longad, @@ -303,8 +306,7 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static int -udf_parse_options(char *options, struct udf_options *uopt) +static int udf_parse_options(char *options, struct udf_options *uopt) { char *p; int option; @@ -323,126 +325,123 @@ udf_parse_options(char *options, struct udf_options *uopt) if (!options) return 1; - while ((p = strsep(&options, ",")) != NULL) - { + while ((p = strsep(&options, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; int token; if (!*p) continue; token = match_token(p, tokens, args); - switch (token) - { - case Opt_novrs: - uopt->novrs = 1; - case Opt_bs: - if (match_int(&args[0], &option)) - return 0; - uopt->blocksize = option; - break; - case Opt_unhide: - uopt->flags |= (1 << UDF_FLAG_UNHIDE); - break; - case Opt_undelete: - uopt->flags |= (1 << UDF_FLAG_UNDELETE); - break; - case Opt_noadinicb: - uopt->flags &= ~(1 << UDF_FLAG_USE_AD_IN_ICB); - break; - case Opt_adinicb: - uopt->flags |= (1 << UDF_FLAG_USE_AD_IN_ICB); - break; - case Opt_shortad: - uopt->flags |= (1 << UDF_FLAG_USE_SHORT_AD); - break; - case Opt_longad: - uopt->flags &= ~(1 << UDF_FLAG_USE_SHORT_AD); - break; - case Opt_gid: - if (match_int(args, &option)) - return 0; - uopt->gid = option; - break; - case Opt_uid: - if (match_int(args, &option)) - return 0; - uopt->uid = option; - break; - case Opt_umask: - if (match_octal(args, &option)) - return 0; - uopt->umask = option; - break; - case Opt_nostrict: - uopt->flags &= ~(1 << UDF_FLAG_STRICT); - break; - case Opt_session: - if (match_int(args, &option)) - return 0; - uopt->session = option; - break; - case Opt_lastblock: - if (match_int(args, &option)) - return 0; - uopt->lastblock = option; - break; - case Opt_anchor: - if (match_int(args, &option)) - return 0; - uopt->anchor = option; - break; - case Opt_volume: - if (match_int(args, &option)) - return 0; - uopt->volume = option; - break; - case Opt_partition: - if (match_int(args, &option)) - return 0; - uopt->partition = option; - break; - case Opt_fileset: - if (match_int(args, &option)) - return 0; - uopt->fileset = option; - break; - case Opt_rootdir: - if (match_int(args, &option)) - return 0; - uopt->rootdir = option; - break; - case Opt_utf8: - uopt->flags |= (1 << UDF_FLAG_UTF8); - break; + switch (token) { + case Opt_novrs: + uopt->novrs = 1; + case Opt_bs: + if (match_int(&args[0], &option)) + return 0; + uopt->blocksize = option; + break; + case Opt_unhide: + uopt->flags |= (1 << UDF_FLAG_UNHIDE); + break; + case Opt_undelete: + uopt->flags |= (1 << UDF_FLAG_UNDELETE); + break; + case Opt_noadinicb: + uopt->flags &= ~(1 << UDF_FLAG_USE_AD_IN_ICB); + break; + case Opt_adinicb: + uopt->flags |= (1 << UDF_FLAG_USE_AD_IN_ICB); + break; + case Opt_shortad: + uopt->flags |= (1 << UDF_FLAG_USE_SHORT_AD); + break; + case Opt_longad: + uopt->flags &= ~(1 << UDF_FLAG_USE_SHORT_AD); + break; + case Opt_gid: + if (match_int(args, &option)) + return 0; + uopt->gid = option; + break; + case Opt_uid: + if (match_int(args, &option)) + return 0; + uopt->uid = option; + break; + case Opt_umask: + if (match_octal(args, &option)) + return 0; + uopt->umask = option; + break; + case Opt_nostrict: + uopt->flags &= ~(1 << UDF_FLAG_STRICT); + break; + case Opt_session: + if (match_int(args, &option)) + return 0; + uopt->session = option; + break; + case Opt_lastblock: + if (match_int(args, &option)) + return 0; + uopt->lastblock = option; + break; + case Opt_anchor: + if (match_int(args, &option)) + return 0; + uopt->anchor = option; + break; + case Opt_volume: + if (match_int(args, &option)) + return 0; + uopt->volume = option; + break; + case Opt_partition: + if (match_int(args, &option)) + return 0; + uopt->partition = option; + break; + case Opt_fileset: + if (match_int(args, &option)) + return 0; + uopt->fileset = option; + break; + case Opt_rootdir: + if (match_int(args, &option)) + return 0; + uopt->rootdir = option; + break; + case Opt_utf8: + uopt->flags |= (1 << UDF_FLAG_UTF8); + break; #ifdef CONFIG_UDF_NLS - case Opt_iocharset: - uopt->nls_map = load_nls(args[0].from); - uopt->flags |= (1 << UDF_FLAG_NLS_MAP); - break; + case Opt_iocharset: + uopt->nls_map = load_nls(args[0].from); + uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + break; #endif - case Opt_uignore: - uopt->flags |= (1 << UDF_FLAG_UID_IGNORE); - break; - case Opt_uforget: - uopt->flags |= (1 << UDF_FLAG_UID_FORGET); - break; - case Opt_gignore: - uopt->flags |= (1 << UDF_FLAG_GID_IGNORE); - break; - case Opt_gforget: - uopt->flags |= (1 << UDF_FLAG_GID_FORGET); - break; - default: - printk(KERN_ERR "udf: bad mount option \"%s\" " - "or missing value\n", p); + case Opt_uignore: + uopt->flags |= (1 << UDF_FLAG_UID_IGNORE); + break; + case Opt_uforget: + uopt->flags |= (1 << UDF_FLAG_UID_FORGET); + break; + case Opt_gignore: + uopt->flags |= (1 << UDF_FLAG_GID_IGNORE); + break; + case Opt_gforget: + uopt->flags |= (1 << UDF_FLAG_GID_FORGET); + break; + default: + printk(KERN_ERR "udf: bad mount option \"%s\" " + "or missing value\n", p); return 0; } } return 1; } -void -udf_write_super(struct super_block *sb) +void udf_write_super(struct super_block *sb) { lock_kernel(); if (!(sb->s_flags & MS_RDONLY)) @@ -451,22 +450,21 @@ udf_write_super(struct super_block *sb) unlock_kernel(); } -static int -udf_remount_fs(struct super_block *sb, int *flags, char *options) +static int udf_remount_fs(struct super_block *sb, int *flags, char *options) { struct udf_options uopt; - uopt.flags = UDF_SB(sb)->s_flags ; - uopt.uid = UDF_SB(sb)->s_uid ; - uopt.gid = UDF_SB(sb)->s_gid ; - uopt.umask = UDF_SB(sb)->s_umask ; + uopt.flags = UDF_SB(sb)->s_flags; + uopt.uid = UDF_SB(sb)->s_uid; + uopt.gid = UDF_SB(sb)->s_gid; + uopt.umask = UDF_SB(sb)->s_umask; - if ( !udf_parse_options(options, &uopt) ) + if (!udf_parse_options(options, &uopt)) return -EINVAL; UDF_SB(sb)->s_flags = uopt.flags; - UDF_SB(sb)->s_uid = uopt.uid; - UDF_SB(sb)->s_gid = uopt.gid; + UDF_SB(sb)->s_uid = uopt.uid; + UDF_SB(sb)->s_gid = uopt.gid; UDF_SB(sb)->s_umask = uopt.umask; if (UDF_SB_LVIDBH(sb)) { @@ -512,8 +510,7 @@ udf_remount_fs(struct super_block *sb, int *flags, char *options) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static int -udf_set_blocksize(struct super_block *sb, int bsize) +static int udf_set_blocksize(struct super_block *sb, int bsize) { if (!sb_min_blocksize(sb, bsize)) { udf_debug("Bad block size (%d)\n", bsize); @@ -523,16 +520,15 @@ udf_set_blocksize(struct super_block *sb, int bsize) return sb->s_blocksize; } -static int -udf_vrs(struct super_block *sb, int silent) +static int udf_vrs(struct super_block *sb, int silent) { struct volStructDesc *vsd = NULL; int sector = 32768; int sectorsize; struct buffer_head *bh = NULL; - int iso9660=0; - int nsr02=0; - int nsr03=0; + int iso9660 = 0; + int nsr02 = 0; + int nsr03 = 0; /* Block size must be a multiple of 512 */ if (sb->s_blocksize & 511) @@ -546,10 +542,9 @@ udf_vrs(struct super_block *sb, int silent) sector += (UDF_SB_SESSION(sb) << sb->s_blocksize_bits); udf_debug("Starting at sector %u (%ld byte sectors)\n", - (sector >> sb->s_blocksize_bits), sb->s_blocksize); + (sector >> sb->s_blocksize_bits), sb->s_blocksize); /* Process the sequence (if applicable) */ - for (;!nsr02 && !nsr03; sector += sectorsize) - { + for (; !nsr02 && !nsr03; sector += sectorsize) { /* Read a block */ bh = udf_tread(sb, sector >> sb->s_blocksize_bits); if (!bh) @@ -557,52 +552,56 @@ udf_vrs(struct super_block *sb, int silent) /* Look for ISO descriptors */ vsd = (struct volStructDesc *)(bh->b_data + - (sector & (sb->s_blocksize - 1))); + (sector & + (sb->s_blocksize - 1))); - if (vsd->stdIdent[0] == 0) - { + if (vsd->stdIdent[0] == 0) { brelse(bh); break; - } - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) - { + } else + if (!strncmp + (vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) { iso9660 = sector; - switch (vsd->structType) - { - case 0: - udf_debug("ISO9660 Boot Record found\n"); - break; - case 1: - udf_debug("ISO9660 Primary Volume Descriptor found\n"); - break; - case 2: - udf_debug("ISO9660 Supplementary Volume Descriptor found\n"); - break; - case 3: - udf_debug("ISO9660 Volume Partition Descriptor found\n"); - break; - case 255: - udf_debug("ISO9660 Volume Descriptor Set Terminator found\n"); - break; - default: - udf_debug("ISO9660 VRS (%u) found\n", vsd->structType); - break; + switch (vsd->structType) { + case 0: + udf_debug("ISO9660 Boot Record found\n"); + break; + case 1: + udf_debug + ("ISO9660 Primary Volume Descriptor found\n"); + break; + case 2: + udf_debug + ("ISO9660 Supplementary Volume Descriptor found\n"); + break; + case 3: + udf_debug + ("ISO9660 Volume Partition Descriptor found\n"); + break; + case 255: + udf_debug + ("ISO9660 Volume Descriptor Set Terminator found\n"); + break; + default: + udf_debug("ISO9660 VRS (%u) found\n", + vsd->structType); + break; } - } - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BEA01, VSD_STD_ID_LEN)) - { - } - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_TEA01, VSD_STD_ID_LEN)) - { + } else + if (!strncmp + (vsd->stdIdent, VSD_STD_ID_BEA01, VSD_STD_ID_LEN)) { + } else + if (!strncmp + (vsd->stdIdent, VSD_STD_ID_TEA01, VSD_STD_ID_LEN)) { brelse(bh); break; - } - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN)) - { + } else + if (!strncmp + (vsd->stdIdent, VSD_STD_ID_NSR02, VSD_STD_ID_LEN)) { nsr02 = sector; - } - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN)) - { + } else + if (!strncmp + (vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN)) { nsr03 = sector; } brelse(bh); @@ -635,8 +634,7 @@ udf_vrs(struct super_block *sb, int silent) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static void -udf_find_anchor(struct super_block *sb) +static void udf_find_anchor(struct super_block *sb) { int lastblock = UDF_SB_LASTBLOCK(sb); struct buffer_head *bh = NULL; @@ -644,13 +642,13 @@ udf_find_anchor(struct super_block *sb) uint32_t location; int i; - if (lastblock) - { + if (lastblock) { int varlastblock = udf_variable_to_fixed(lastblock); - int last[] = { lastblock, lastblock - 2, - lastblock - 150, lastblock - 152, - varlastblock, varlastblock - 2, - varlastblock - 150, varlastblock - 152 }; + int last[] = { lastblock, lastblock - 2, + lastblock - 150, lastblock - 152, + varlastblock, varlastblock - 2, + varlastblock - 150, varlastblock - 152 + }; lastblock = 0; @@ -663,90 +661,103 @@ udf_find_anchor(struct super_block *sb) * however, if the disc isn't closed, it could be 512 */ for (i = 0; !lastblock && i < ARRAY_SIZE(last); i++) { - if (last[i] < 0 || !(bh = sb_bread(sb, last[i]))) - { + if (last[i] < 0 || !(bh = sb_bread(sb, last[i]))) { ident = location = 0; - } - else - { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); + } else { + ident = + le16_to_cpu(((tag *) bh->b_data)->tagIdent); + location = + le32_to_cpu(((tag *) bh->b_data)-> + tagLocation); brelse(bh); } - if (ident == TAG_IDENT_AVDP) - { - if (location == last[i] - UDF_SB_SESSION(sb)) - { - lastblock = UDF_SB_ANCHOR(sb)[0] = last[i] - UDF_SB_SESSION(sb); - UDF_SB_ANCHOR(sb)[1] = last[i] - 256 - UDF_SB_SESSION(sb); - } - else if (location == udf_variable_to_fixed(last[i]) - UDF_SB_SESSION(sb)) - { + if (ident == TAG_IDENT_AVDP) { + if (location == last[i] - UDF_SB_SESSION(sb)) { + lastblock = UDF_SB_ANCHOR(sb)[0] = + last[i] - UDF_SB_SESSION(sb); + UDF_SB_ANCHOR(sb)[1] = + last[i] - 256 - UDF_SB_SESSION(sb); + } else if (location == + udf_variable_to_fixed(last[i]) - + UDF_SB_SESSION(sb)) { UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); - lastblock = UDF_SB_ANCHOR(sb)[0] = udf_variable_to_fixed(last[i]) - UDF_SB_SESSION(sb); - UDF_SB_ANCHOR(sb)[1] = lastblock - 256 - UDF_SB_SESSION(sb); - } - else - udf_debug("Anchor found at block %d, location mismatch %d.\n", - last[i], location); - } - else if (ident == TAG_IDENT_FE || ident == TAG_IDENT_EFE) - { + lastblock = UDF_SB_ANCHOR(sb)[0] = + udf_variable_to_fixed(last[i]) - + UDF_SB_SESSION(sb); + UDF_SB_ANCHOR(sb)[1] = + lastblock - 256 - + UDF_SB_SESSION(sb); + } else + udf_debug + ("Anchor found at block %d, location mismatch %d.\n", + last[i], location); + } else if (ident == TAG_IDENT_FE + || ident == TAG_IDENT_EFE) { lastblock = last[i]; UDF_SB_ANCHOR(sb)[3] = 512; - } - else - { - if (last[i] < 256 || !(bh = sb_bread(sb, last[i] - 256))) - { + } else { + if (last[i] < 256 + || !(bh = sb_bread(sb, last[i] - 256))) { ident = location = 0; - } - else - { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); + } else { + ident = + le16_to_cpu(((tag *) bh->b_data)-> + tagIdent); + location = + le32_to_cpu(((tag *) bh->b_data)-> + tagLocation); brelse(bh); } - + if (ident == TAG_IDENT_AVDP && - location == last[i] - 256 - UDF_SB_SESSION(sb)) - { + location == + last[i] - 256 - UDF_SB_SESSION(sb)) { lastblock = last[i]; UDF_SB_ANCHOR(sb)[1] = last[i] - 256; - } - else - { - if (last[i] < 312 + UDF_SB_SESSION(sb) || !(bh = sb_bread(sb, last[i] - 312 - UDF_SB_SESSION(sb)))) + } else { + if (last[i] < 312 + UDF_SB_SESSION(sb) + || !(bh = + sb_bread(sb, + last[i] - 312 - + UDF_SB_SESSION(sb)))) { ident = location = 0; - } - else - { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); + } else { + ident = + le16_to_cpu(((tag *) bh-> + b_data)-> + tagIdent); + location = + le32_to_cpu(((tag *) bh-> + b_data)-> + tagLocation); brelse(bh); } - + if (ident == TAG_IDENT_AVDP && - location == udf_variable_to_fixed(last[i]) - 256) - { - UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); - lastblock = udf_variable_to_fixed(last[i]); - UDF_SB_ANCHOR(sb)[1] = lastblock - 256; + location == + udf_variable_to_fixed(last[i]) - + 256) { + UDF_SET_FLAG(sb, + UDF_FLAG_VARCONV); + lastblock = + udf_variable_to_fixed(last + [i]); + UDF_SB_ANCHOR(sb)[1] = + lastblock - 256; } } } } } - if (!lastblock) - { + if (!lastblock) { /* We havn't found the lastblock. check 312 */ - if ((bh = sb_bread(sb, 312 + UDF_SB_SESSION(sb)))) - { - ident = le16_to_cpu(((tag *)bh->b_data)->tagIdent); - location = le32_to_cpu(((tag *)bh->b_data)->tagLocation); + if ((bh = sb_bread(sb, 312 + UDF_SB_SESSION(sb)))) { + ident = le16_to_cpu(((tag *) bh->b_data)->tagIdent); + location = + le32_to_cpu(((tag *) bh->b_data)->tagLocation); brelse(bh); if (ident == TAG_IDENT_AVDP && location == 256) @@ -755,18 +766,19 @@ udf_find_anchor(struct super_block *sb) } for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) { - if (UDF_SB_ANCHOR(sb)[i]) - { + if (UDF_SB_ANCHOR(sb)[i]) { if (!(bh = udf_read_tagged(sb, - UDF_SB_ANCHOR(sb)[i], UDF_SB_ANCHOR(sb)[i], &ident))) - { + UDF_SB_ANCHOR(sb)[i], + UDF_SB_ANCHOR(sb)[i], + &ident))) { UDF_SB_ANCHOR(sb)[i] = 0; - } - else - { + } else { brelse(bh); if ((ident != TAG_IDENT_AVDP) && (i || - (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE))) + (ident != + TAG_IDENT_FE + && ident != + TAG_IDENT_EFE))) { UDF_SB_ANCHOR(sb)[i] = 0; } @@ -777,72 +789,75 @@ udf_find_anchor(struct super_block *sb) UDF_SB_LASTBLOCK(sb) = lastblock; } -static int -udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr *root) +static int +udf_find_fileset(struct super_block *sb, kernel_lb_addr * fileset, + kernel_lb_addr * root) { struct buffer_head *bh = NULL; long lastblock; uint16_t ident; if (fileset->logicalBlockNum != 0xFFFFFFFF || - fileset->partitionReferenceNum != 0xFFFF) - { + fileset->partitionReferenceNum != 0xFFFF) { bh = udf_read_ptagged(sb, *fileset, 0, &ident); if (!bh) return 1; - else if (ident != TAG_IDENT_FSD) - { + else if (ident != TAG_IDENT_FSD) { brelse(bh); return 1; } - + } - if (!bh) /* Search backwards through the partitions */ - { + if (!bh) { /* Search backwards through the partitions */ kernel_lb_addr newfileset; return 1; - - for (newfileset.partitionReferenceNum=UDF_SB_NUMPARTS(sb)-1; - (newfileset.partitionReferenceNum != 0xFFFF && - fileset->logicalBlockNum == 0xFFFFFFFF && - fileset->partitionReferenceNum == 0xFFFF); - newfileset.partitionReferenceNum--) - { - lastblock = UDF_SB_PARTLEN(sb, newfileset.partitionReferenceNum); + + for (newfileset.partitionReferenceNum = UDF_SB_NUMPARTS(sb) - 1; + (newfileset.partitionReferenceNum != 0xFFFF && + fileset->logicalBlockNum == 0xFFFFFFFF && + fileset->partitionReferenceNum == 0xFFFF); + newfileset.partitionReferenceNum--) { + lastblock = + UDF_SB_PARTLEN(sb, + newfileset.partitionReferenceNum); newfileset.logicalBlockNum = 0; - do - { - bh = udf_read_ptagged(sb, newfileset, 0, &ident); - if (!bh) - { - newfileset.logicalBlockNum ++; + do { + bh = udf_read_ptagged(sb, newfileset, 0, + &ident); + if (!bh) { + newfileset.logicalBlockNum++; continue; } - switch (ident) - { - case TAG_IDENT_SBD: + switch (ident) { + case TAG_IDENT_SBD: { struct spaceBitmapDesc *sp; - sp = (struct spaceBitmapDesc *)bh->b_data; - newfileset.logicalBlockNum += 1 + - ((le32_to_cpu(sp->numOfBytes) + sizeof(struct spaceBitmapDesc) - 1) - >> sb->s_blocksize_bits); + sp = (struct spaceBitmapDesc *) + bh->b_data; + newfileset.logicalBlockNum += + 1 + + ((le32_to_cpu + (sp->numOfBytes) + + sizeof(struct + spaceBitmapDesc) - + 1) + >> sb->s_blocksize_bits); brelse(bh); break; } - case TAG_IDENT_FSD: + case TAG_IDENT_FSD: { *fileset = newfileset; break; } - default: + default: { - newfileset.logicalBlockNum ++; + newfileset.logicalBlockNum++; brelse(bh); bh = NULL; break; @@ -850,16 +865,16 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr } } while (newfileset.logicalBlockNum < lastblock && - fileset->logicalBlockNum == 0xFFFFFFFF && - fileset->partitionReferenceNum == 0xFFFF); + fileset->logicalBlockNum == 0xFFFFFFFF && + fileset->partitionReferenceNum == 0xFFFF); } } if ((fileset->logicalBlockNum != 0xFFFFFFFF || - fileset->partitionReferenceNum != 0xFFFF) && bh) - { + fileset->partitionReferenceNum != 0xFFFF) && bh) { udf_debug("Fileset at block=%d, partition=%d\n", - fileset->logicalBlockNum, fileset->partitionReferenceNum); + fileset->logicalBlockNum, + fileset->partitionReferenceNum); UDF_SB_PARTITION(sb) = fileset->partitionReferenceNum; udf_load_fileset(sb, bh, root); @@ -869,8 +884,7 @@ udf_find_fileset(struct super_block *sb, kernel_lb_addr *fileset, kernel_lb_addr return 1; } -static void -udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) +static void udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) { struct primaryVolDesc *pvoldesc; time_t recording; @@ -880,37 +894,35 @@ udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) pvoldesc = (struct primaryVolDesc *)bh->b_data; - if ( udf_stamp_to_time(&recording, &recording_usec, - lets_to_cpu(pvoldesc->recordingDateAndTime)) ) - { + if (udf_stamp_to_time(&recording, &recording_usec, + lets_to_cpu(pvoldesc->recordingDateAndTime))) { kernel_timestamp ts; ts = lets_to_cpu(pvoldesc->recordingDateAndTime); - udf_debug("recording time %ld/%ld, %04u/%02u/%02u %02u:%02u (%x)\n", - recording, recording_usec, - ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.typeAndTimezone); + udf_debug + ("recording time %ld/%ld, %04u/%02u/%02u %02u:%02u (%x)\n", + recording, recording_usec, ts.year, ts.month, ts.day, + ts.hour, ts.minute, ts.typeAndTimezone); UDF_SB_RECORDTIME(sb).tv_sec = recording; UDF_SB_RECORDTIME(sb).tv_nsec = recording_usec * 1000; } - if ( !udf_build_ustr(&instr, pvoldesc->volIdent, 32) ) - { - if (udf_CS0toUTF8(&outstr, &instr)) - { - strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name, + if (!udf_build_ustr(&instr, pvoldesc->volIdent, 32)) { + if (udf_CS0toUTF8(&outstr, &instr)) { + strncpy(UDF_SB_VOLIDENT(sb), outstr.u_name, outstr.u_len > 31 ? 31 : outstr.u_len); udf_debug("volIdent[] = '%s'\n", UDF_SB_VOLIDENT(sb)); } } - if ( !udf_build_ustr(&instr, pvoldesc->volSetIdent, 128) ) - { + if (!udf_build_ustr(&instr, pvoldesc->volSetIdent, 128)) { if (udf_CS0toUTF8(&outstr, &instr)) udf_debug("volSetIdent[] = '%s'\n", outstr.u_name); } } -static void -udf_load_fileset(struct super_block *sb, struct buffer_head *bh, kernel_lb_addr *root) +static void +udf_load_fileset(struct super_block *sb, struct buffer_head *bh, + kernel_lb_addr * root) { struct fileSetDesc *fset; @@ -920,109 +932,154 @@ udf_load_fileset(struct super_block *sb, struct buffer_head *bh, kernel_lb_addr UDF_SB_SERIALNUM(sb) = le16_to_cpu(fset->descTag.tagSerialNum); - udf_debug("Rootdir at block=%d, partition=%d\n", - root->logicalBlockNum, root->partitionReferenceNum); + udf_debug("Rootdir at block=%d, partition=%d\n", + root->logicalBlockNum, root->partitionReferenceNum); } -static void -udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) +static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) { struct partitionDesc *p; int i; p = (struct partitionDesc *)bh->b_data; - for (i=0; i<UDF_SB_NUMPARTS(sb); i++) - { - udf_debug("Searching map: (%d == %d)\n", - UDF_SB_PARTMAPS(sb)[i].s_partition_num, le16_to_cpu(p->partitionNumber)); - if (UDF_SB_PARTMAPS(sb)[i].s_partition_num == le16_to_cpu(p->partitionNumber)) - { - UDF_SB_PARTLEN(sb,i) = le32_to_cpu(p->partitionLength); /* blocks */ - UDF_SB_PARTROOT(sb,i) = le32_to_cpu(p->partitionStartingLocation); - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_READ_ONLY) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_READ_ONLY; - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_WRITE_ONCE) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_WRITE_ONCE; - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_REWRITABLE) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_REWRITABLE; - if (le32_to_cpu(p->accessType) == PD_ACCESS_TYPE_OVERWRITABLE) - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_OVERWRITABLE; - - if (!strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) || - !strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) - { + for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) { + udf_debug("Searching map: (%d == %d)\n", + UDF_SB_PARTMAPS(sb)[i].s_partition_num, + le16_to_cpu(p->partitionNumber)); + if (UDF_SB_PARTMAPS(sb)[i].s_partition_num == + le16_to_cpu(p->partitionNumber)) { + UDF_SB_PARTLEN(sb, i) = le32_to_cpu(p->partitionLength); /* blocks */ + UDF_SB_PARTROOT(sb, i) = + le32_to_cpu(p->partitionStartingLocation); + if (le32_to_cpu(p->accessType) == + PD_ACCESS_TYPE_READ_ONLY) + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_READ_ONLY; + if (le32_to_cpu(p->accessType) == + PD_ACCESS_TYPE_WRITE_ONCE) + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_WRITE_ONCE; + if (le32_to_cpu(p->accessType) == + PD_ACCESS_TYPE_REWRITABLE) + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_REWRITABLE; + if (le32_to_cpu(p->accessType) == + PD_ACCESS_TYPE_OVERWRITABLE) + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_OVERWRITABLE; + + if (!strcmp + (p->partitionContents.ident, + PD_PARTITION_CONTENTS_NSR02) + || !strcmp(p->partitionContents.ident, + PD_PARTITION_CONTENTS_NSR03)) { struct partitionHeaderDesc *phd; - phd = (struct partitionHeaderDesc *)(p->partitionContentsUse); - if (phd->unallocSpaceTable.extLength) - { - kernel_lb_addr loc = { le32_to_cpu(phd->unallocSpaceTable.extPosition), i }; - - UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table = - udf_iget(sb, loc); - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_TABLE; - udf_debug("unallocSpaceTable (part %d) @ %ld\n", - i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table->i_ino); + phd = + (struct partitionHeaderDesc *)(p-> + partitionContentsUse); + if (phd->unallocSpaceTable.extLength) { + kernel_lb_addr loc = + { le32_to_cpu(phd-> + unallocSpaceTable. + extPosition), i }; + + UDF_SB_PARTMAPS(sb)[i].s_uspace. + s_table = udf_iget(sb, loc); + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_UNALLOC_TABLE; + udf_debug + ("unallocSpaceTable (part %d) @ %ld\n", + i, + UDF_SB_PARTMAPS(sb)[i].s_uspace. + s_table->i_ino); } - if (phd->unallocSpaceBitmap.extLength) - { + if (phd->unallocSpaceBitmap.extLength) { UDF_SB_ALLOC_BITMAP(sb, i, s_uspace); - if (UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap != NULL) - { - UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extLength = - le32_to_cpu(phd->unallocSpaceBitmap.extLength); - UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extPosition = - le32_to_cpu(phd->unallocSpaceBitmap.extPosition); - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_BITMAP; - udf_debug("unallocSpaceBitmap (part %d) @ %d\n", - i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_bitmap->s_extPosition); + if (UDF_SB_PARTMAPS(sb)[i].s_uspace. + s_bitmap != NULL) { + UDF_SB_PARTMAPS(sb)[i].s_uspace. + s_bitmap->s_extLength = + le32_to_cpu(phd-> + unallocSpaceBitmap. + extLength); + UDF_SB_PARTMAPS(sb)[i].s_uspace. + s_bitmap->s_extPosition = + le32_to_cpu(phd-> + unallocSpaceBitmap. + extPosition); + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_UNALLOC_BITMAP; + udf_debug + ("unallocSpaceBitmap (part %d) @ %d\n", + i, + UDF_SB_PARTMAPS(sb)[i]. + s_uspace.s_bitmap-> + s_extPosition); } } if (phd->partitionIntegrityTable.extLength) - udf_debug("partitionIntegrityTable (part %d)\n", i); - if (phd->freedSpaceTable.extLength) - { - kernel_lb_addr loc = { le32_to_cpu(phd->freedSpaceTable.extPosition), i }; - - UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table = - udf_iget(sb, loc); - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_TABLE; - udf_debug("freedSpaceTable (part %d) @ %ld\n", - i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table->i_ino); + udf_debug + ("partitionIntegrityTable (part %d)\n", + i); + if (phd->freedSpaceTable.extLength) { + kernel_lb_addr loc = + { le32_to_cpu(phd->freedSpaceTable. + extPosition), i }; + + UDF_SB_PARTMAPS(sb)[i].s_fspace. + s_table = udf_iget(sb, loc); + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_FREED_TABLE; + udf_debug + ("freedSpaceTable (part %d) @ %ld\n", + i, + UDF_SB_PARTMAPS(sb)[i].s_fspace. + s_table->i_ino); } - if (phd->freedSpaceBitmap.extLength) - { + if (phd->freedSpaceBitmap.extLength) { UDF_SB_ALLOC_BITMAP(sb, i, s_fspace); - if (UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap != NULL) - { - UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extLength = - le32_to_cpu(phd->freedSpaceBitmap.extLength); - UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extPosition = - le32_to_cpu(phd->freedSpaceBitmap.extPosition); - UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_BITMAP; - udf_debug("freedSpaceBitmap (part %d) @ %d\n", - i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_bitmap->s_extPosition); + if (UDF_SB_PARTMAPS(sb)[i].s_fspace. + s_bitmap != NULL) { + UDF_SB_PARTMAPS(sb)[i].s_fspace. + s_bitmap->s_extLength = + le32_to_cpu(phd-> + freedSpaceBitmap. + extLength); + UDF_SB_PARTMAPS(sb)[i].s_fspace. + s_bitmap->s_extPosition = + le32_to_cpu(phd-> + freedSpaceBitmap. + extPosition); + UDF_SB_PARTFLAGS(sb, i) |= + UDF_PART_FLAG_FREED_BITMAP; + udf_debug + ("freedSpaceBitmap (part %d) @ %d\n", + i, + UDF_SB_PARTMAPS(sb)[i]. + s_fspace.s_bitmap-> + s_extPosition); } } } break; } } - if (i == UDF_SB_NUMPARTS(sb)) - { - udf_debug("Partition (%d) not found in partition map\n", le16_to_cpu(p->partitionNumber)); - } - else - { - udf_debug("Partition (%d:%d type %x) starts at physical %d, block length %d\n", - le16_to_cpu(p->partitionNumber), i, UDF_SB_PARTTYPE(sb,i), - UDF_SB_PARTROOT(sb,i), UDF_SB_PARTLEN(sb,i)); + if (i == UDF_SB_NUMPARTS(sb)) { + udf_debug("Partition (%d) not found in partition map\n", + le16_to_cpu(p->partitionNumber)); + } else { + udf_debug + ("Partition (%d:%d type %x) starts at physical %d, block length %d\n", + le16_to_cpu(p->partitionNumber), i, UDF_SB_PARTTYPE(sb, i), + UDF_SB_PARTROOT(sb, i), UDF_SB_PARTLEN(sb, i)); } } -static int -udf_load_logicalvol(struct super_block *sb, struct buffer_head * bh, kernel_lb_addr *fileset) +static int +udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, + kernel_lb_addr * fileset) { struct logicalVolDesc *lvd; int i, j, offset; @@ -1032,82 +1089,114 @@ udf_load_logicalvol(struct super_block *sb, struct buffer_head * bh, kernel_lb_a UDF_SB_ALLOC_PARTMAPS(sb, le32_to_cpu(lvd->numPartitionMaps)); - for (i=0,offset=0; - i<UDF_SB_NUMPARTS(sb) && offset<le32_to_cpu(lvd->mapTableLength); - i++,offset+=((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))->partitionMapLength) - { - type = ((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))->partitionMapType; - if (type == 1) - { - struct genericPartitionMap1 *gpm1 = (struct genericPartitionMap1 *)&(lvd->partitionMaps[offset]); - UDF_SB_PARTTYPE(sb,i) = UDF_TYPE1_MAP15; - UDF_SB_PARTVSN(sb,i) = le16_to_cpu(gpm1->volSeqNum); - UDF_SB_PARTNUM(sb,i) = le16_to_cpu(gpm1->partitionNum); - UDF_SB_PARTFUNC(sb,i) = NULL; - } - else if (type == 2) - { - struct udfPartitionMap2 *upm2 = (struct udfPartitionMap2 *)&(lvd->partitionMaps[offset]); - if (!strncmp(upm2->partIdent.ident, UDF_ID_VIRTUAL, strlen(UDF_ID_VIRTUAL))) - { - if (le16_to_cpu(((__le16 *)upm2->partIdent.identSuffix)[0]) == 0x0150) - { - UDF_SB_PARTTYPE(sb,i) = UDF_VIRTUAL_MAP15; - UDF_SB_PARTFUNC(sb,i) = udf_get_pblock_virt15; - } - else if (le16_to_cpu(((__le16 *)upm2->partIdent.identSuffix)[0]) == 0x0200) - { - UDF_SB_PARTTYPE(sb,i) = UDF_VIRTUAL_MAP20; - UDF_SB_PARTFUNC(sb,i) = udf_get_pblock_virt20; + for (i = 0, offset = 0; + i < UDF_SB_NUMPARTS(sb) + && offset < le32_to_cpu(lvd->mapTableLength); + i++, offset += + ((struct genericPartitionMap *)&(lvd->partitionMaps[offset]))-> + partitionMapLength) { + type = + ((struct genericPartitionMap *) + &(lvd->partitionMaps[offset]))->partitionMapType; + if (type == 1) { + struct genericPartitionMap1 *gpm1 = + (struct genericPartitionMap1 *)&(lvd-> + partitionMaps + [offset]); + UDF_SB_PARTTYPE(sb, i) = UDF_TYPE1_MAP15; + UDF_SB_PARTVSN(sb, i) = le16_to_cpu(gpm1->volSeqNum); + UDF_SB_PARTNUM(sb, i) = le16_to_cpu(gpm1->partitionNum); + UDF_SB_PARTFUNC(sb, i) = NULL; + } else if (type == 2) { + struct udfPartitionMap2 *upm2 = + (struct udfPartitionMap2 *)&(lvd-> + partitionMaps[offset]); + if (!strncmp + (upm2->partIdent.ident, UDF_ID_VIRTUAL, + strlen(UDF_ID_VIRTUAL))) { + if (le16_to_cpu + (((__le16 *) upm2->partIdent. + identSuffix)[0]) == 0x0150) { + UDF_SB_PARTTYPE(sb, i) = + UDF_VIRTUAL_MAP15; + UDF_SB_PARTFUNC(sb, i) = + udf_get_pblock_virt15; + } else + if (le16_to_cpu + (((__le16 *) upm2->partIdent. + identSuffix)[0]) == 0x0200) { + UDF_SB_PARTTYPE(sb, i) = + UDF_VIRTUAL_MAP20; + UDF_SB_PARTFUNC(sb, i) = + udf_get_pblock_virt20; } - } - else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) - { + } else + if (!strncmp + (upm2->partIdent.ident, UDF_ID_SPARABLE, + strlen(UDF_ID_SPARABLE))) { uint32_t loc; uint16_t ident; struct sparingTable *st; - struct sparablePartitionMap *spm = (struct sparablePartitionMap *)&(lvd->partitionMaps[offset]); - - UDF_SB_PARTTYPE(sb,i) = UDF_SPARABLE_MAP15; - UDF_SB_TYPESPAR(sb,i).s_packet_len = le16_to_cpu(spm->packetLength); - for (j=0; j<spm->numSparingTables; j++) - { - loc = le32_to_cpu(spm->locSparingTable[j]); - UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = - udf_read_tagged(sb, loc, loc, &ident); - if (UDF_SB_TYPESPAR(sb,i).s_spar_map[j] != NULL) - { - st = (struct sparingTable *)UDF_SB_TYPESPAR(sb,i).s_spar_map[j]->b_data; - if (ident != 0 || - strncmp(st->sparingIdent.ident, UDF_ID_SPARING, strlen(UDF_ID_SPARING))) + struct sparablePartitionMap *spm = + (struct sparablePartitionMap *)&(lvd-> + partitionMaps + [offset]); + + UDF_SB_PARTTYPE(sb, i) = UDF_SPARABLE_MAP15; + UDF_SB_TYPESPAR(sb, i).s_packet_len = + le16_to_cpu(spm->packetLength); + for (j = 0; j < spm->numSparingTables; j++) { + loc = + le32_to_cpu(spm-> + locSparingTable[j]); + UDF_SB_TYPESPAR(sb, i).s_spar_map[j] = + udf_read_tagged(sb, loc, loc, + &ident); + if (UDF_SB_TYPESPAR(sb, i). + s_spar_map[j] != NULL) { + st = (struct sparingTable *) + UDF_SB_TYPESPAR(sb, + i). + s_spar_map[j]->b_data; + if (ident != 0 + || strncmp(st->sparingIdent. + ident, + UDF_ID_SPARING, + strlen + (UDF_ID_SPARING))) { - brelse(UDF_SB_TYPESPAR(sb,i).s_spar_map[j]); - UDF_SB_TYPESPAR(sb,i).s_spar_map[j] = NULL; + brelse(UDF_SB_TYPESPAR + (sb, + i). + s_spar_map[j]); + UDF_SB_TYPESPAR(sb, + i). + s_spar_map[j] = + NULL; } } } - UDF_SB_PARTFUNC(sb,i) = udf_get_pblock_spar15; - } - else - { - udf_debug("Unknown ident: %s\n", upm2->partIdent.ident); + UDF_SB_PARTFUNC(sb, i) = udf_get_pblock_spar15; + } else { + udf_debug("Unknown ident: %s\n", + upm2->partIdent.ident); continue; } - UDF_SB_PARTVSN(sb,i) = le16_to_cpu(upm2->volSeqNum); - UDF_SB_PARTNUM(sb,i) = le16_to_cpu(upm2->partitionNum); + UDF_SB_PARTVSN(sb, i) = le16_to_cpu(upm2->volSeqNum); + UDF_SB_PARTNUM(sb, i) = le16_to_cpu(upm2->partitionNum); } udf_debug("Partition (%d:%d) type %d on volume %d\n", - i, UDF_SB_PARTNUM(sb,i), type, UDF_SB_PARTVSN(sb,i)); + i, UDF_SB_PARTNUM(sb, i), type, UDF_SB_PARTVSN(sb, + i)); } - if (fileset) - { - long_ad *la = (long_ad *)&(lvd->logicalVolContentsUse[0]); + if (fileset) { + long_ad *la = (long_ad *) & (lvd->logicalVolContentsUse[0]); *fileset = lelb_to_cpu(la->extLocation); - udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n", - fileset->logicalBlockNum, - fileset->partitionReferenceNum); + udf_debug + ("FileSet found in LogicalVolDesc at block=%d, partition=%d\n", + fileset->logicalBlockNum, fileset->partitionReferenceNum); } if (lvd->integritySeqExt.extLength) udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt)); @@ -1118,26 +1207,26 @@ udf_load_logicalvol(struct super_block *sb, struct buffer_head * bh, kernel_lb_a * udf_load_logicalvolint * */ -static void -udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) +static void udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) { struct buffer_head *bh = NULL; uint16_t ident; while (loc.extLength > 0 && - (bh = udf_read_tagged(sb, loc.extLocation, - loc.extLocation, &ident)) && - ident == TAG_IDENT_LVID) - { + (bh = udf_read_tagged(sb, loc.extLocation, + loc.extLocation, &ident)) && + ident == TAG_IDENT_LVID) { UDF_SB_LVIDBH(sb) = bh; - + if (UDF_SB_LVID(sb)->nextIntegrityExt.extLength) - udf_load_logicalvolint(sb, leea_to_cpu(UDF_SB_LVID(sb)->nextIntegrityExt)); - + udf_load_logicalvolint(sb, + leea_to_cpu(UDF_SB_LVID(sb)-> + nextIntegrityExt)); + if (UDF_SB_LVIDBH(sb) != bh) brelse(bh); loc.extLength -= sb->s_blocksize; - loc.extLocation ++; + loc.extLocation++; } if (UDF_SB_LVIDBH(sb) != bh) brelse(bh); @@ -1158,15 +1247,16 @@ udf_load_logicalvolint(struct super_block *sb, kernel_extent_ad loc) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static int -udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_lb_addr *fileset) +static int +udf_process_sequence(struct super_block *sb, long block, long lastblock, + kernel_lb_addr * fileset) { struct buffer_head *bh = NULL; struct udf_vds_record vds[VDS_POS_LENGTH]; struct generic_desc *gd; struct volDescPtr *vdp; - int done=0; - int i,j; + int done = 0; + int i, j; uint32_t vdsn; uint16_t ident; long next_s = 0, next_e = 0; @@ -1174,93 +1264,92 @@ udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_ memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); /* Read the main descriptor sequence */ - for (;(!done && block <= lastblock); block++) - { + for (; (!done && block <= lastblock); block++) { bh = udf_read_tagged(sb, block, block, &ident); - if (!bh) + if (!bh) break; /* Process each descriptor (ISO 13346 3/8.3-8.4) */ gd = (struct generic_desc *)bh->b_data; vdsn = le32_to_cpu(gd->volDescSeqNum); - switch (ident) - { - case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ - if (vdsn >= vds[VDS_POS_PRIMARY_VOL_DESC].volDescSeqNum) - { - vds[VDS_POS_PRIMARY_VOL_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_PRIMARY_VOL_DESC].block = block; - } - break; - case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ - if (vdsn >= vds[VDS_POS_VOL_DESC_PTR].volDescSeqNum) - { - vds[VDS_POS_VOL_DESC_PTR].volDescSeqNum = vdsn; - vds[VDS_POS_VOL_DESC_PTR].block = block; - - vdp = (struct volDescPtr *)bh->b_data; - next_s = le32_to_cpu(vdp->nextVolDescSeqExt.extLocation); - next_e = le32_to_cpu(vdp->nextVolDescSeqExt.extLength); - next_e = next_e >> sb->s_blocksize_bits; - next_e += next_s; - } - break; - case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ - if (vdsn >= vds[VDS_POS_IMP_USE_VOL_DESC].volDescSeqNum) - { - vds[VDS_POS_IMP_USE_VOL_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_IMP_USE_VOL_DESC].block = block; - } - break; - case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ - if (!vds[VDS_POS_PARTITION_DESC].block) - vds[VDS_POS_PARTITION_DESC].block = block; - break; - case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ - if (vdsn >= vds[VDS_POS_LOGICAL_VOL_DESC].volDescSeqNum) - { - vds[VDS_POS_LOGICAL_VOL_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_LOGICAL_VOL_DESC].block = block; - } - break; - case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ - if (vdsn >= vds[VDS_POS_UNALLOC_SPACE_DESC].volDescSeqNum) - { - vds[VDS_POS_UNALLOC_SPACE_DESC].volDescSeqNum = vdsn; - vds[VDS_POS_UNALLOC_SPACE_DESC].block = block; - } - break; - case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ - vds[VDS_POS_TERMINATING_DESC].block = block; - if (next_e) - { - block = next_s; - lastblock = next_e; - next_s = next_e = 0; - } - else - done = 1; - break; + switch (ident) { + case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ + if (vdsn >= vds[VDS_POS_PRIMARY_VOL_DESC].volDescSeqNum) { + vds[VDS_POS_PRIMARY_VOL_DESC].volDescSeqNum = + vdsn; + vds[VDS_POS_PRIMARY_VOL_DESC].block = block; + } + break; + case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ + if (vdsn >= vds[VDS_POS_VOL_DESC_PTR].volDescSeqNum) { + vds[VDS_POS_VOL_DESC_PTR].volDescSeqNum = vdsn; + vds[VDS_POS_VOL_DESC_PTR].block = block; + + vdp = (struct volDescPtr *)bh->b_data; + next_s = + le32_to_cpu(vdp->nextVolDescSeqExt. + extLocation); + next_e = + le32_to_cpu(vdp->nextVolDescSeqExt. + extLength); + next_e = next_e >> sb->s_blocksize_bits; + next_e += next_s; + } + break; + case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ + if (vdsn >= vds[VDS_POS_IMP_USE_VOL_DESC].volDescSeqNum) { + vds[VDS_POS_IMP_USE_VOL_DESC].volDescSeqNum = + vdsn; + vds[VDS_POS_IMP_USE_VOL_DESC].block = block; + } + break; + case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ + if (!vds[VDS_POS_PARTITION_DESC].block) + vds[VDS_POS_PARTITION_DESC].block = block; + break; + case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ + if (vdsn >= vds[VDS_POS_LOGICAL_VOL_DESC].volDescSeqNum) { + vds[VDS_POS_LOGICAL_VOL_DESC].volDescSeqNum = + vdsn; + vds[VDS_POS_LOGICAL_VOL_DESC].block = block; + } + break; + case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ + if (vdsn >= + vds[VDS_POS_UNALLOC_SPACE_DESC].volDescSeqNum) { + vds[VDS_POS_UNALLOC_SPACE_DESC].volDescSeqNum = + vdsn; + vds[VDS_POS_UNALLOC_SPACE_DESC].block = block; + } + break; + case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ + vds[VDS_POS_TERMINATING_DESC].block = block; + if (next_e) { + block = next_s; + lastblock = next_e; + next_s = next_e = 0; + } else + done = 1; + break; } brelse(bh); } - for (i=0; i<VDS_POS_LENGTH; i++) - { - if (vds[i].block) - { - bh = udf_read_tagged(sb, vds[i].block, vds[i].block, &ident); + for (i = 0; i < VDS_POS_LENGTH; i++) { + if (vds[i].block) { + bh = udf_read_tagged(sb, vds[i].block, vds[i].block, + &ident); if (i == VDS_POS_PRIMARY_VOL_DESC) udf_load_pvoldesc(sb, bh); else if (i == VDS_POS_LOGICAL_VOL_DESC) udf_load_logicalvol(sb, bh, fileset); - else if (i == VDS_POS_PARTITION_DESC) - { + else if (i == VDS_POS_PARTITION_DESC) { struct buffer_head *bh2 = NULL; udf_load_partdesc(sb, bh); - for (j=vds[i].block+1; j<vds[VDS_POS_TERMINATING_DESC].block; j++) - { + for (j = vds[i].block + 1; + j < vds[VDS_POS_TERMINATING_DESC].block; + j++) { bh2 = udf_read_tagged(sb, j, j, &ident); gd = (struct generic_desc *)bh2->b_data; if (ident == TAG_IDENT_PD) @@ -1278,31 +1367,27 @@ udf_process_sequence(struct super_block *sb, long block, long lastblock, kernel_ /* * udf_check_valid() */ -static int -udf_check_valid(struct super_block *sb, int novrs, int silent) +static int udf_check_valid(struct super_block *sb, int novrs, int silent) { long block; - if (novrs) - { + if (novrs) { udf_debug("Validity check skipped because of novrs option\n"); return 0; } /* Check that it is NSR02 compliant */ /* Process any "CD-ROM Volume Descriptor Set" (ECMA 167 2/8.3.1) */ - else if ((block = udf_vrs(sb, silent)) == -1) - { - udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n"); + else if ((block = udf_vrs(sb, silent)) == -1) { + udf_debug + ("Failed to read byte 32768. Assuming open disc. Skipping validity check\n"); if (!UDF_SB_LASTBLOCK(sb)) UDF_SB_LASTBLOCK(sb) = udf_get_last_block(sb); return 0; - } - else + } else return !block; } -static int -udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) +static int udf_load_partition(struct super_block *sb, kernel_lb_addr * fileset) { struct anchorVolDescPtr *anchor; uint16_t ident; @@ -1315,19 +1400,27 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) for (i = 0; i < ARRAY_SIZE(UDF_SB_ANCHOR(sb)); i++) { if (UDF_SB_ANCHOR(sb)[i] && (bh = udf_read_tagged(sb, - UDF_SB_ANCHOR(sb)[i], UDF_SB_ANCHOR(sb)[i], &ident))) - { + UDF_SB_ANCHOR + (sb)[i], + UDF_SB_ANCHOR + (sb)[i], + &ident))) { anchor = (struct anchorVolDescPtr *)bh->b_data; /* Locate the main sequence */ - main_s = le32_to_cpu( anchor->mainVolDescSeqExt.extLocation ); - main_e = le32_to_cpu( anchor->mainVolDescSeqExt.extLength ); + main_s = + le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); + main_e = + le32_to_cpu(anchor->mainVolDescSeqExt.extLength); main_e = main_e >> sb->s_blocksize_bits; main_e += main_s; /* Locate the reserve sequence */ - reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation); - reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); + reserve_s = + le32_to_cpu(anchor->reserveVolDescSeqExt. + extLocation); + reserve_e = + le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); reserve_e = reserve_e >> sb->s_blocksize_bits; reserve_e += reserve_s; @@ -1335,9 +1428,10 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) /* Process the main & reserve sequences */ /* responsible for finding the PartitionDesc(s) */ - if (!(udf_process_sequence(sb, main_s, main_e, fileset) && - udf_process_sequence(sb, reserve_s, reserve_e, fileset))) - { + if (! + (udf_process_sequence(sb, main_s, main_e, fileset) + && udf_process_sequence(sb, reserve_s, reserve_e, + fileset))) { break; } } @@ -1349,36 +1443,37 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) } else udf_debug("Using anchor in block %d\n", UDF_SB_ANCHOR(sb)[i]); - for (i=0; i<UDF_SB_NUMPARTS(sb); i++) - { - switch (UDF_SB_PARTTYPE(sb, i)) - { - case UDF_VIRTUAL_MAP15: - case UDF_VIRTUAL_MAP20: + for (i = 0; i < UDF_SB_NUMPARTS(sb); i++) { + switch (UDF_SB_PARTTYPE(sb, i)) { + case UDF_VIRTUAL_MAP15: + case UDF_VIRTUAL_MAP20: { kernel_lb_addr uninitialized_var(ino); - if (!UDF_SB_LASTBLOCK(sb)) - { - UDF_SB_LASTBLOCK(sb) = udf_get_last_block(sb); + if (!UDF_SB_LASTBLOCK(sb)) { + UDF_SB_LASTBLOCK(sb) = + udf_get_last_block(sb); udf_find_anchor(sb); } - if (!UDF_SB_LASTBLOCK(sb)) - { - udf_debug("Unable to determine Lastblock (For Virtual Partition)\n"); + if (!UDF_SB_LASTBLOCK(sb)) { + udf_debug + ("Unable to determine Lastblock (For Virtual Partition)\n"); return 1; } - for (j=0; j<UDF_SB_NUMPARTS(sb); j++) - { + for (j = 0; j < UDF_SB_NUMPARTS(sb); j++) { if (j != i && - UDF_SB_PARTVSN(sb,i) == UDF_SB_PARTVSN(sb,j) && - UDF_SB_PARTNUM(sb,i) == UDF_SB_PARTNUM(sb,j)) - { + UDF_SB_PARTVSN(sb, + i) == + UDF_SB_PARTVSN(sb, j) + && UDF_SB_PARTNUM(sb, + i) == + UDF_SB_PARTNUM(sb, j)) { ino.partitionReferenceNum = j; - ino.logicalBlockNum = UDF_SB_LASTBLOCK(sb) - - UDF_SB_PARTROOT(sb,j); + ino.logicalBlockNum = + UDF_SB_LASTBLOCK(sb) - + UDF_SB_PARTROOT(sb, j); break; } } @@ -1389,13 +1484,13 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) if (!(UDF_SB_VAT(sb) = udf_iget(sb, ino))) return 1; - if (UDF_SB_PARTTYPE(sb,i) == UDF_VIRTUAL_MAP15) - { - UDF_SB_TYPEVIRT(sb,i).s_start_offset = udf_ext0_offset(UDF_SB_VAT(sb)); - UDF_SB_TYPEVIRT(sb,i).s_num_entries = (UDF_SB_VAT(sb)->i_size - 36) >> 2; - } - else if (UDF_SB_PARTTYPE(sb,i) == UDF_VIRTUAL_MAP20) - { + if (UDF_SB_PARTTYPE(sb, i) == UDF_VIRTUAL_MAP15) { + UDF_SB_TYPEVIRT(sb, i).s_start_offset = + udf_ext0_offset(UDF_SB_VAT(sb)); + UDF_SB_TYPEVIRT(sb, i).s_num_entries = + (UDF_SB_VAT(sb)->i_size - 36) >> 2; + } else if (UDF_SB_PARTTYPE(sb, i) == + UDF_VIRTUAL_MAP20) { struct buffer_head *bh = NULL; uint32_t pos; @@ -1403,15 +1498,26 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) bh = sb_bread(sb, pos); if (!bh) return 1; - UDF_SB_TYPEVIRT(sb,i).s_start_offset = - le16_to_cpu(((struct virtualAllocationTable20 *)bh->b_data + udf_ext0_offset(UDF_SB_VAT(sb)))->lengthHeader) + - udf_ext0_offset(UDF_SB_VAT(sb)); - UDF_SB_TYPEVIRT(sb,i).s_num_entries = (UDF_SB_VAT(sb)->i_size - - UDF_SB_TYPEVIRT(sb,i).s_start_offset) >> 2; + UDF_SB_TYPEVIRT(sb, i).s_start_offset = + le16_to_cpu(((struct + virtualAllocationTable20 + *)bh->b_data + + udf_ext0_offset + (UDF_SB_VAT(sb)))-> + lengthHeader) + + udf_ext0_offset(UDF_SB_VAT(sb)); + UDF_SB_TYPEVIRT(sb, i).s_num_entries = + (UDF_SB_VAT(sb)->i_size - + UDF_SB_TYPEVIRT(sb, + i). + s_start_offset) >> 2; brelse(bh); } - UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0); - UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum); + UDF_SB_PARTROOT(sb, i) = + udf_get_pblock(sb, 0, i, 0); + UDF_SB_PARTLEN(sb, i) = + UDF_SB_PARTLEN(sb, + ino.partitionReferenceNum); } } } @@ -1420,26 +1526,28 @@ udf_load_partition(struct super_block *sb, kernel_lb_addr *fileset) static void udf_open_lvid(struct super_block *sb) { - if (UDF_SB_LVIDBH(sb)) - { + if (UDF_SB_LVIDBH(sb)) { int i; kernel_timestamp cpu_time; UDF_SB_LVIDIU(sb)->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; UDF_SB_LVIDIU(sb)->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) - UDF_SB_LVID(sb)->recordingDateAndTime = cpu_to_lets(cpu_time); + UDF_SB_LVID(sb)->recordingDateAndTime = + cpu_to_lets(cpu_time); UDF_SB_LVID(sb)->integrityType = LVID_INTEGRITY_TYPE_OPEN; UDF_SB_LVID(sb)->descTag.descCRC = - cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag), - le16_to_cpu(UDF_SB_LVID(sb)->descTag.descCRCLength), 0)); + cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag), + le16_to_cpu(UDF_SB_LVID(sb)->descTag. + descCRCLength), 0)); UDF_SB_LVID(sb)->descTag.tagChecksum = 0; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) UDF_SB_LVID(sb)->descTag.tagChecksum += - ((uint8_t *)&(UDF_SB_LVID(sb)->descTag))[i]; + ((uint8_t *) & + (UDF_SB_LVID(sb)->descTag))[i]; mark_buffer_dirty(UDF_SB_LVIDBH(sb)); } @@ -1448,32 +1556,41 @@ static void udf_open_lvid(struct super_block *sb) static void udf_close_lvid(struct super_block *sb) { if (UDF_SB_LVIDBH(sb) && - UDF_SB_LVID(sb)->integrityType == LVID_INTEGRITY_TYPE_OPEN) - { + UDF_SB_LVID(sb)->integrityType == LVID_INTEGRITY_TYPE_OPEN) { int i; kernel_timestamp cpu_time; UDF_SB_LVIDIU(sb)->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; UDF_SB_LVIDIU(sb)->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; if (udf_time_to_stamp(&cpu_time, CURRENT_TIME)) - UDF_SB_LVID(sb)->recordingDateAndTime = cpu_to_lets(cpu_time); - if (UDF_MAX_WRITE_VERSION > le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev)) - UDF_SB_LVIDIU(sb)->maxUDFWriteRev = cpu_to_le16(UDF_MAX_WRITE_VERSION); - if (UDF_SB_UDFREV(sb) > le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev)) - UDF_SB_LVIDIU(sb)->minUDFReadRev = cpu_to_le16(UDF_SB_UDFREV(sb)); - if (UDF_SB_UDFREV(sb) > le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev)) - UDF_SB_LVIDIU(sb)->minUDFWriteRev = cpu_to_le16(UDF_SB_UDFREV(sb)); - UDF_SB_LVID(sb)->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); + UDF_SB_LVID(sb)->recordingDateAndTime = + cpu_to_lets(cpu_time); + if (UDF_MAX_WRITE_VERSION > + le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev)) + UDF_SB_LVIDIU(sb)->maxUDFWriteRev = + cpu_to_le16(UDF_MAX_WRITE_VERSION); + if (UDF_SB_UDFREV(sb) > + le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev)) + UDF_SB_LVIDIU(sb)->minUDFReadRev = + cpu_to_le16(UDF_SB_UDFREV(sb)); + if (UDF_SB_UDFREV(sb) > + le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev)) + UDF_SB_LVIDIU(sb)->minUDFWriteRev = + cpu_to_le16(UDF_SB_UDFREV(sb)); + UDF_SB_LVID(sb)->integrityType = + cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); UDF_SB_LVID(sb)->descTag.descCRC = - cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag), - le16_to_cpu(UDF_SB_LVID(sb)->descTag.descCRCLength), 0)); + cpu_to_le16(udf_crc((char *)UDF_SB_LVID(sb) + sizeof(tag), + le16_to_cpu(UDF_SB_LVID(sb)->descTag. + descCRCLength), 0)); UDF_SB_LVID(sb)->descTag.tagChecksum = 0; - for (i=0; i<16; i++) + for (i = 0; i < 16; i++) if (i != 4) UDF_SB_LVID(sb)->descTag.tagChecksum += - ((uint8_t *)&(UDF_SB_LVID(sb)->descTag))[i]; + ((uint8_t *) & + (UDF_SB_LVID(sb)->descTag))[i]; mark_buffer_dirty(UDF_SB_LVIDBH(sb)); } @@ -1498,7 +1615,7 @@ static void udf_close_lvid(struct super_block *sb) static int udf_fill_super(struct super_block *sb, void *options, int silent) { int i; - struct inode *inode=NULL; + struct inode *inode = NULL; struct udf_options uopt; kernel_lb_addr rootdir, fileset; struct udf_sb_info *sbi; @@ -1520,15 +1637,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) goto error_out; if (uopt.flags & (1 << UDF_FLAG_UTF8) && - uopt.flags & (1 << UDF_FLAG_NLS_MAP)) - { + uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { udf_error(sb, "udf_read_super", - "utf8 cannot be combined with iocharset\n"); + "utf8 cannot be combined with iocharset\n"); goto error_out; } #ifdef CONFIG_UDF_NLS - if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) - { + if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { uopt.nls_map = load_nls_default(); if (!uopt.nls_map) uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP); @@ -1552,7 +1667,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!udf_set_blocksize(sb, uopt.blocksize)) goto error_out; - if ( uopt.session == 0xFFFFFFFF ) + if (uopt.session == 0xFFFFFFFF) UDF_SB_SESSION(sb) = udf_get_last_session(sb); else UDF_SB_SESSION(sb) = uopt.session; @@ -1564,10 +1679,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) UDF_SB_ANCHOR(sb)[2] = uopt.anchor; UDF_SB_ANCHOR(sb)[3] = 256; - if (udf_check_valid(sb, uopt.novrs, silent)) /* read volume recognition sequences */ - { + if (udf_check_valid(sb, uopt.novrs, silent)) { /* read volume recognition sequences */ printk("UDF-fs: No VRS found\n"); - goto error_out; + goto error_out; } udf_find_anchor(sb); @@ -1579,29 +1693,26 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sb->s_magic = UDF_SUPER_MAGIC; sb->s_time_gran = 1000; - if (udf_load_partition(sb, &fileset)) - { + if (udf_load_partition(sb, &fileset)) { printk("UDF-fs: No partition found (1)\n"); goto error_out; } udf_debug("Lastblock=%d\n", UDF_SB_LASTBLOCK(sb)); - if ( UDF_SB_LVIDBH(sb) ) - { - uint16_t minUDFReadRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev); - uint16_t minUDFWriteRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev); + if (UDF_SB_LVIDBH(sb)) { + uint16_t minUDFReadRev = + le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev); + uint16_t minUDFWriteRev = + le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev); /* uint16_t maxUDFWriteRev = le16_to_cpu(UDF_SB_LVIDIU(sb)->maxUDFWriteRev); */ - if (minUDFReadRev > UDF_MAX_READ_VERSION) - { + if (minUDFReadRev > UDF_MAX_READ_VERSION) { printk("UDF-fs: minUDFReadRev=%x (max is %x)\n", - le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev), - UDF_MAX_READ_VERSION); + le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFReadRev), + UDF_MAX_READ_VERSION); goto error_out; - } - else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) - { + } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) { sb->s_flags |= MS_RDONLY; } @@ -1613,31 +1724,30 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) UDF_SET_FLAG(sb, UDF_FLAG_USE_STREAMS); } - if ( !UDF_SB_NUMPARTS(sb) ) - { + if (!UDF_SB_NUMPARTS(sb)) { printk("UDF-fs: No partition found (2)\n"); goto error_out; } - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY) { - printk("UDF-fs: Partition marked readonly; forcing readonly mount\n"); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_READ_ONLY) { + printk + ("UDF-fs: Partition marked readonly; forcing readonly mount\n"); sb->s_flags |= MS_RDONLY; } - if ( udf_find_fileset(sb, &fileset, &rootdir) ) - { + if (udf_find_fileset(sb, &fileset, &rootdir)) { printk("UDF-fs: No fileset found\n"); goto error_out; } - if (!silent) - { + if (!silent) { kernel_timestamp ts; udf_time_to_stamp(&ts, UDF_SB_RECORDTIME(sb)); - udf_info("UDF %s (%s) Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n", - UDFFS_VERSION, UDFFS_DATE, - UDF_SB_VOLIDENT(sb), ts.year, ts.month, ts.day, ts.hour, ts.minute, - ts.typeAndTimezone); + udf_info + ("UDF %s (%s) Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n", + UDFFS_VERSION, UDFFS_DATE, UDF_SB_VOLIDENT(sb), ts.year, + ts.month, ts.day, ts.hour, ts.minute, ts.typeAndTimezone); } if (!(sb->s_flags & MS_RDONLY)) udf_open_lvid(sb); @@ -1645,18 +1755,16 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Assign the root inode */ /* assign inodes by physical block number */ /* perhaps it's not extensible enough, but for now ... */ - inode = udf_iget(sb, rootdir); - if (!inode) - { + inode = udf_iget(sb, rootdir); + if (!inode) { printk("UDF-fs: Error in udf_iget, block=%d, partition=%d\n", - rootdir.logicalBlockNum, rootdir.partitionReferenceNum); + rootdir.logicalBlockNum, rootdir.partitionReferenceNum); goto error_out; } /* Allocate a dentry for the root inode */ sb->s_root = d_alloc_root(inode); - if (!sb->s_root) - { + if (!sb->s_root) { printk("UDF-fs: Couldn't allocate root dentry\n"); iput(inode); goto error_out; @@ -1664,23 +1772,30 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; return 0; -error_out: + error_out: if (UDF_SB_VAT(sb)) iput(UDF_SB_VAT(sb)); - if (UDF_SB_NUMPARTS(sb)) - { - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb),s_uspace); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb),s_fspace); - if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) - { - for (i=0; i<4; i++) - brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); + if (UDF_SB_NUMPARTS(sb)) { + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_UNALLOC_TABLE) + iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace. + s_table); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_FREED_TABLE) + iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace. + s_table); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_UNALLOC_BITMAP) + UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_uspace); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_FREED_BITMAP) + UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_fspace); + if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == + UDF_SPARABLE_MAP15) { + for (i = 0; i < 4; i++) + brelse(UDF_SB_TYPESPAR + (sb, + UDF_SB_PARTITION(sb)).s_spar_map[i]); } } #ifdef CONFIG_UDF_NLS @@ -1697,32 +1812,31 @@ error_out: } void udf_error(struct super_block *sb, const char *function, - const char *fmt, ...) + const char *fmt, ...) { va_list args; - if (!(sb->s_flags & MS_RDONLY)) - { + if (!(sb->s_flags & MS_RDONLY)) { /* mark sb error */ sb->s_dirt = 1; } va_start(args, fmt); vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); - printk (KERN_CRIT "UDF-fs error (device %s): %s: %s\n", - sb->s_id, function, error_buf); + printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n", + sb->s_id, function, error_buf); } void udf_warning(struct super_block *sb, const char *function, - const char *fmt, ...) + const char *fmt, ...) { va_list args; - va_start (args, fmt); + va_start(args, fmt); vsnprintf(error_buf, sizeof(error_buf), fmt, args); va_end(args); printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n", - sb->s_id, function, error_buf); + sb->s_id, function, error_buf); } /* @@ -1738,27 +1852,33 @@ void udf_warning(struct super_block *sb, const char *function, * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static void -udf_put_super(struct super_block *sb) +static void udf_put_super(struct super_block *sb) { int i; if (UDF_SB_VAT(sb)) iput(UDF_SB_VAT(sb)); - if (UDF_SB_NUMPARTS(sb)) - { - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) - iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb),s_uspace); - if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) - UDF_SB_FREE_BITMAP(sb,UDF_SB_PARTITION(sb),s_fspace); - if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == UDF_SPARABLE_MAP15) - { - for (i=0; i<4; i++) - brelse(UDF_SB_TYPESPAR(sb, UDF_SB_PARTITION(sb)).s_spar_map[i]); + if (UDF_SB_NUMPARTS(sb)) { + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_UNALLOC_TABLE) + iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace. + s_table); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_FREED_TABLE) + iput(UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace. + s_table); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_UNALLOC_BITMAP) + UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_uspace); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_FREED_BITMAP) + UDF_SB_FREE_BITMAP(sb, UDF_SB_PARTITION(sb), s_fspace); + if (UDF_SB_PARTTYPE(sb, UDF_SB_PARTITION(sb)) == + UDF_SPARABLE_MAP15) { + for (i = 0; i < 4; i++) + brelse(UDF_SB_TYPESPAR + (sb, + UDF_SB_PARTITION(sb)).s_spar_map[i]); } } #ifdef CONFIG_UDF_NLS @@ -1786,8 +1906,7 @@ udf_put_super(struct super_block *sb) * July 1, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static int -udf_statfs(struct dentry *dentry, struct kstatfs *buf) +static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -1797,11 +1916,12 @@ udf_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = udf_count_free(sb); buf->f_bavail = buf->f_bfree; buf->f_files = (UDF_SB_LVIDBH(sb) ? - (le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + - le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs)) : 0) + buf->f_bfree; + (le32_to_cpu(UDF_SB_LVIDIU(sb)->numFiles) + + le32_to_cpu(UDF_SB_LVIDIU(sb)->numDirs)) : 0) + + buf->f_bfree; buf->f_ffree = buf->f_bfree; /* __kernel_fsid_t f_fsid */ - buf->f_namelen = UDF_NAME_LEN-2; + buf->f_namelen = UDF_NAME_LEN - 2; return 0; } @@ -1830,13 +1950,10 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) loc.partitionReferenceNum = UDF_SB_PARTITION(sb); bh = udf_read_ptagged(sb, loc, 0, &ident); - if (!bh) - { + if (!bh) { printk(KERN_ERR "udf: udf_count_free failed\n"); goto out; - } - else if (ident != TAG_IDENT_SBD) - { + } else if (ident != TAG_IDENT_SBD) { brelse(bh); printk(KERN_ERR "udf: udf_count_free failed\n"); goto out; @@ -1844,43 +1961,39 @@ udf_count_free_bitmap(struct super_block *sb, struct udf_bitmap *bitmap) bm = (struct spaceBitmapDesc *)bh->b_data; bytes = le32_to_cpu(bm->numOfBytes); - index = sizeof(struct spaceBitmapDesc); /* offset in first block only */ - ptr = (uint8_t *)bh->b_data; + index = sizeof(struct spaceBitmapDesc); /* offset in first block only */ + ptr = (uint8_t *) bh->b_data; - while ( bytes > 0 ) - { - while ((bytes > 0) && (index < sb->s_blocksize)) - { + while (bytes > 0) { + while ((bytes > 0) && (index < sb->s_blocksize)) { value = ptr[index]; - accum += udf_bitmap_lookup[ value & 0x0f ]; - accum += udf_bitmap_lookup[ value >> 4 ]; + accum += udf_bitmap_lookup[value & 0x0f]; + accum += udf_bitmap_lookup[value >> 4]; index++; bytes--; } - if ( bytes ) - { + if (bytes) { brelse(bh); newblock = udf_get_lb_pblock(sb, loc, ++block); bh = udf_tread(sb, newblock); - if (!bh) - { + if (!bh) { udf_debug("read failed\n"); goto out; } index = 0; - ptr = (uint8_t *)bh->b_data; + ptr = (uint8_t *) bh->b_data; } } brelse(bh); -out: + out: unlock_kernel(); return accum; } static unsigned int -udf_count_free_table(struct super_block *sb, struct inode * table) +udf_count_free_table(struct super_block *sb, struct inode *table) { unsigned int accum = 0; uint32_t elen; @@ -1902,17 +2015,17 @@ udf_count_free_table(struct super_block *sb, struct inode * table) return accum; } - -static unsigned int -udf_count_free(struct super_block *sb) + +static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - if (UDF_SB_LVIDBH(sb)) - { - if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb)) - { - accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]); + if (UDF_SB_LVIDBH(sb)) { + if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > + UDF_SB_PARTITION(sb)) { + accum = + le32_to_cpu(UDF_SB_LVID(sb)-> + freeSpaceTable[UDF_SB_PARTITION(sb)]); if (accum == 0xFFFFFFFF) accum = 0; @@ -1922,28 +2035,40 @@ udf_count_free(struct super_block *sb) if (accum) return accum; - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) - { - accum += udf_count_free_bitmap(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_bitmap); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_UNALLOC_BITMAP) { + accum += + udf_count_free_bitmap(sb, + UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION + (sb)].s_uspace. + s_bitmap); } - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) - { - accum += udf_count_free_bitmap(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_bitmap); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_FREED_BITMAP) { + accum += + udf_count_free_bitmap(sb, + UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION + (sb)].s_fspace. + s_bitmap); } if (accum) return accum; - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_TABLE) - { - accum += udf_count_free_table(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.s_table); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_UNALLOC_TABLE) { + accum += + udf_count_free_table(sb, + UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION + (sb)].s_uspace. + s_table); } - if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_TABLE) - { - accum += udf_count_free_table(sb, - UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.s_table); + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & + UDF_PART_FLAG_FREED_TABLE) { + accum += + udf_count_free_table(sb, + UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION + (sb)].s_fspace. + s_table); } return accum; diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 12613b680cc4..c4b82a920082 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -33,41 +33,40 @@ #include <linux/buffer_head.h> #include "udf_i.h" -static void udf_pc_to_char(struct super_block *sb, char *from, int fromlen, char *to) +static void udf_pc_to_char(struct super_block *sb, char *from, int fromlen, + char *to) { struct pathComponent *pc; int elen = 0; char *p = to; - while (elen < fromlen) - { + while (elen < fromlen) { pc = (struct pathComponent *)(from + elen); - switch (pc->componentType) - { - case 1: - if (pc->lengthComponentIdent == 0) - { - p = to; - *p++ = '/'; - } - break; - case 3: - memcpy(p, "../", 3); - p += 3; - break; - case 4: - memcpy(p, "./", 2); - p += 2; - /* that would be . - just ignore */ - break; - case 5: - p += udf_get_filename(sb, pc->componentIdent, p, pc->lengthComponentIdent); + switch (pc->componentType) { + case 1: + if (pc->lengthComponentIdent == 0) { + p = to; *p++ = '/'; - break; + } + break; + case 3: + memcpy(p, "../", 3); + p += 3; + break; + case 4: + memcpy(p, "./", 2); + p += 2; + /* that would be . - just ignore */ + break; + case 5: + p += udf_get_filename(sb, pc->componentIdent, p, + pc->lengthComponentIdent); + *p++ = '/'; + break; } elen += sizeof(struct pathComponent) + pc->lengthComponentIdent; } - if (p > to+1) + if (p > to + 1) p[-1] = '\0'; else p[0] = '\0'; @@ -84,8 +83,7 @@ static int udf_symlink_filler(struct file *file, struct page *page) lock_kernel(); if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB) symlink = UDF_I_DATA(inode) + UDF_I_LENEATTR(inode); - else - { + else { bh = sb_bread(inode->i_sb, udf_block_map(inode, 0)); if (!bh) @@ -102,7 +100,7 @@ static int udf_symlink_filler(struct file *file, struct page *page) kunmap(page); unlock_page(page); return 0; -out: + out: unlock_kernel(); SetPageError(page); kunmap(page); @@ -114,5 +112,5 @@ out: * symlinks can't do much... */ const struct address_space_operations udf_symlink_aops = { - .readpage = udf_symlink_filler, + .readpage = udf_symlink_filler, }; diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 60d277644248..b2002da0a5c0 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -28,35 +28,38 @@ #include "udf_i.h" #include "udf_sb.h" -static void extent_trunc(struct inode * inode, struct extent_position *epos, - kernel_lb_addr eloc, int8_t etype, uint32_t elen, uint32_t nelen) +static void extent_trunc(struct inode *inode, struct extent_position *epos, + kernel_lb_addr eloc, int8_t etype, uint32_t elen, + uint32_t nelen) { kernel_lb_addr neloc = { 0, 0 }; - int last_block = (elen + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; - int first_block = (nelen + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; + int last_block = + (elen + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits; + int first_block = + (nelen + inode->i_sb->s_blocksize - + 1) >> inode->i_sb->s_blocksize_bits; - if (nelen) - { - if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - udf_free_blocks(inode->i_sb, inode, eloc, 0, last_block); + if (nelen) { + if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + udf_free_blocks(inode->i_sb, inode, eloc, 0, + last_block); etype = (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30); - } - else + } else neloc = eloc; nelen = (etype << 30) | nelen; } - if (elen != nelen) - { + if (elen != nelen) { udf_write_aext(inode, epos, neloc, nelen, 0); - if (last_block - first_block > 0) - { + if (last_block - first_block > 0) { if (etype == (EXT_RECORDED_ALLOCATED >> 30)) mark_inode_dirty(inode); if (etype != (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) - udf_free_blocks(inode->i_sb, inode, eloc, first_block, last_block - first_block); + udf_free_blocks(inode->i_sb, inode, eloc, + first_block, + last_block - first_block); } } } @@ -67,7 +70,7 @@ static void extent_trunc(struct inode * inode, struct extent_position *epos, */ void udf_truncate_tail_extent(struct inode *inode) { - struct extent_position epos = { NULL, 0, {0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; kernel_lb_addr eloc; uint32_t elen, nelen; uint64_t lbcount = 0; @@ -89,8 +92,7 @@ void udf_truncate_tail_extent(struct inode *inode) BUG(); /* Find the last extent in the file */ - while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) - { + while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { etype = netype; lbcount += elen; if (lbcount > inode->i_size) { @@ -123,7 +125,7 @@ void udf_truncate_tail_extent(struct inode *inode) void udf_discard_prealloc(struct inode *inode) { - struct extent_position epos = { NULL, 0, {0, 0}}; + struct extent_position epos = { NULL, 0, {0, 0} }; kernel_lb_addr eloc; uint32_t elen; uint64_t lbcount = 0; @@ -131,7 +133,7 @@ void udf_discard_prealloc(struct inode *inode) int adsize; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_IN_ICB || - inode->i_size == UDF_I_LENEXTENTS(inode)) + inode->i_size == UDF_I_LENEXTENTS(inode)) return; if (UDF_I_ALLOCTYPE(inode) == ICBTAG_FLAG_AD_SHORT) @@ -153,15 +155,21 @@ void udf_discard_prealloc(struct inode *inode) lbcount -= elen; extent_trunc(inode, &epos, eloc, etype, elen, 0); if (!epos.bh) { - UDF_I_LENALLOC(inode) = epos.offset - udf_file_entry_alloc_offset(inode); + UDF_I_LENALLOC(inode) = + epos.offset - udf_file_entry_alloc_offset(inode); mark_inode_dirty(inode); } else { - struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); - aed->lengthAllocDescs = cpu_to_le32(epos.offset - sizeof(struct allocExtDesc)); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) + struct allocExtDesc *aed = + (struct allocExtDesc *)(epos.bh->b_data); + aed->lengthAllocDescs = + cpu_to_le32(epos.offset - + sizeof(struct allocExtDesc)); + if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(inode->i_sb) >= 0x0201) udf_update_tag(epos.bh->b_data, epos.offset); else - udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(epos.bh->b_data, + sizeof(struct allocExtDesc)); mark_buffer_dirty_inode(epos.bh, inode); } } @@ -171,7 +179,7 @@ void udf_discard_prealloc(struct inode *inode) brelse(epos.bh); } -void udf_truncate_extents(struct inode * inode) +void udf_truncate_extents(struct inode *inode) { struct extent_position epos; kernel_lb_addr eloc, neloc = { 0, 0 }; @@ -190,9 +198,10 @@ void udf_truncate_extents(struct inode * inode) BUG(); etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); - byte_offset = (offset << sb->s_blocksize_bits) + (inode->i_size & (sb->s_blocksize-1)); - if (etype != -1) - { + byte_offset = + (offset << sb->s_blocksize_bits) + + (inode->i_size & (sb->s_blocksize - 1)); + if (etype != -1) { epos.offset -= adsize; extent_trunc(inode, &epos, eloc, etype, elen, byte_offset); epos.offset += adsize; @@ -206,86 +215,98 @@ void udf_truncate_extents(struct inode * inode) else lenalloc -= sizeof(struct allocExtDesc); - while ((etype = udf_current_aext(inode, &epos, &eloc, &elen, 0)) != -1) - { - if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) - { + while ((etype = + udf_current_aext(inode, &epos, &eloc, &elen, + 0)) != -1) { + if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { udf_write_aext(inode, &epos, neloc, nelen, 0); - if (indirect_ext_len) - { + if (indirect_ext_len) { /* We managed to free all extents in the * indirect extent - free it too */ if (!epos.bh) BUG(); - udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len); - } - else - { - if (!epos.bh) - { - UDF_I_LENALLOC(inode) = lenalloc; + udf_free_blocks(sb, inode, epos.block, + 0, indirect_ext_len); + } else { + if (!epos.bh) { + UDF_I_LENALLOC(inode) = + lenalloc; mark_inode_dirty(inode); - } - else - { - struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); - aed->lengthAllocDescs = cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(sb) >= 0x0201) - udf_update_tag(epos.bh->b_data, lenalloc + - sizeof(struct allocExtDesc)); + } else { + struct allocExtDesc *aed = + (struct allocExtDesc + *)(epos.bh->b_data); + aed->lengthAllocDescs = + cpu_to_le32(lenalloc); + if (!UDF_QUERY_FLAG + (sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(sb) >= + 0x0201) + udf_update_tag(epos.bh-> + b_data, + lenalloc + + + sizeof + (struct + allocExtDesc)); else - udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(epos.bh, inode); + udf_update_tag(epos.bh-> + b_data, + sizeof + (struct + allocExtDesc)); + mark_buffer_dirty_inode(epos.bh, + inode); } } brelse(epos.bh); epos.offset = sizeof(struct allocExtDesc); epos.block = eloc; - epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, eloc, 0)); + epos.bh = + udf_tread(sb, + udf_get_lb_pblock(sb, eloc, 0)); if (elen) indirect_ext_len = (elen + - sb->s_blocksize - 1) >> - sb->s_blocksize_bits; + sb->s_blocksize - + 1) >> sb-> + s_blocksize_bits; else indirect_ext_len = 1; - } - else - { - extent_trunc(inode, &epos, eloc, etype, elen, 0); + } else { + extent_trunc(inode, &epos, eloc, etype, elen, + 0); epos.offset += adsize; } } - if (indirect_ext_len) - { + if (indirect_ext_len) { if (!epos.bh) BUG(); - udf_free_blocks(sb, inode, epos.block, 0, indirect_ext_len); - } - else - { - if (!epos.bh) - { + udf_free_blocks(sb, inode, epos.block, 0, + indirect_ext_len); + } else { + if (!epos.bh) { UDF_I_LENALLOC(inode) = lenalloc; mark_inode_dirty(inode); - } - else - { - struct allocExtDesc *aed = (struct allocExtDesc *)(epos.bh->b_data); + } else { + struct allocExtDesc *aed = + (struct allocExtDesc *)(epos.bh->b_data); aed->lengthAllocDescs = cpu_to_le32(lenalloc); - if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(sb) >= 0x0201) - udf_update_tag(epos.bh->b_data, lenalloc + - sizeof(struct allocExtDesc)); + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT) + || UDF_SB_UDFREV(sb) >= 0x0201) + udf_update_tag(epos.bh->b_data, + lenalloc + + sizeof(struct + allocExtDesc)); else - udf_update_tag(epos.bh->b_data, sizeof(struct allocExtDesc)); + udf_update_tag(epos.bh->b_data, + sizeof(struct + allocExtDesc)); mark_buffer_dirty_inode(epos.bh, inode); } } - } - else if (inode->i_size) - { - if (byte_offset) - { + } else if (inode->i_size) { + if (byte_offset) { kernel_long_ad extent; /* @@ -293,21 +314,33 @@ void udf_truncate_extents(struct inode * inode) * no extent above inode->i_size => truncate is * extending the file by 'offset' blocks. */ - if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || - (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { + if ((!epos.bh + && epos.offset == + udf_file_entry_alloc_offset(inode)) || (epos.bh + && epos. + offset == + sizeof + (struct + allocExtDesc))) + { /* File has no extents at all or has empty last * indirect extent! Create a fake extent... */ extent.extLocation.logicalBlockNum = 0; extent.extLocation.partitionReferenceNum = 0; - extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; - } - else { + extent.extLength = + EXT_NOT_RECORDED_NOT_ALLOCATED; + } else { epos.offset -= adsize; etype = udf_next_aext(inode, &epos, - &extent.extLocation, &extent.extLength, 0); + &extent.extLocation, + &extent.extLength, 0); extent.extLength |= etype << 30; } - udf_extend_file(inode, &epos, &extent, offset+((inode->i_size & (sb->s_blocksize-1)) != 0)); + udf_extend_file(inode, &epos, &extent, + offset + + ((inode-> + i_size & (sb->s_blocksize - 1)) != + 0)); } } UDF_I_LENEXTENTS(inode) = inode->i_size; diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index d7dbe6f3ba0c..bee4308a8113 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -23,4 +23,4 @@ static inline struct udf_inode_info *UDF_I(struct inode *inode) #define UDF_I_LAD(X) ( UDF_I(X)->i_ext.i_lad ) #define UDF_I_DATA(X) ( UDF_I(X)->i_ext.i_data ) -#endif /* !defined(_LINUX_UDF_I_H) */ +#endif /* !defined(_LINUX_UDF_I_H) */ diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 3b2e6c8cb151..60f31d8cebee 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -20,8 +20,8 @@ #define UDF_FLAG_VARCONV 8 #define UDF_FLAG_NLS_MAP 9 #define UDF_FLAG_UTF8 10 -#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ -#define UDF_FLAG_UID_IGNORE 12 /* use sb uid instead of on disk uid */ +#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ +#define UDF_FLAG_UID_IGNORE 12 /* use sb uid instead of on disk uid */ #define UDF_FLAG_GID_FORGET 13 #define UDF_FLAG_GID_IGNORE 14 @@ -139,4 +139,4 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb) #define UDF_SB_FLAGS(X) ( UDF_SB(X)->s_flags ) #define UDF_SB_VAT(X) ( UDF_SB(X)->s_vat ) -#endif /* __LINUX_UDF_SB_H */ +#endif /* __LINUX_UDF_SB_H */ diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index f581f2f69c0f..76f2b82a39dc 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -50,30 +50,26 @@ extern const struct address_space_operations udf_aops; extern const struct address_space_operations udf_adinicb_aops; extern const struct address_space_operations udf_symlink_aops; -struct udf_fileident_bh -{ +struct udf_fileident_bh { struct buffer_head *sbh; struct buffer_head *ebh; int soffset; int eoffset; }; -struct udf_vds_record -{ +struct udf_vds_record { uint32_t block; uint32_t volDescSeqNum; }; -struct generic_desc -{ - tag descTag; - __le32 volDescSeqNum; +struct generic_desc { + tag descTag; + __le32 volDescSeqNum; }; -struct ustr -{ +struct ustr { uint8_t u_cmpID; - uint8_t u_name[UDF_NAME_LEN-2]; + uint8_t u_name[UDF_NAME_LEN - 2]; uint8_t u_len; }; @@ -83,44 +79,58 @@ struct extent_position { kernel_lb_addr block; }; - /* super.c */ extern void udf_error(struct super_block *, const char *, const char *, ...); extern void udf_warning(struct super_block *, const char *, const char *, ...); /* namei.c */ -extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, struct fileIdentDesc *, struct udf_fileident_bh *, uint8_t *, uint8_t *); +extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, + struct fileIdentDesc *, struct udf_fileident_bh *, + uint8_t *, uint8_t *); /* file.c */ -extern int udf_ioctl(struct inode *, struct file *, unsigned int, unsigned long); +extern int udf_ioctl(struct inode *, struct file *, unsigned int, + unsigned long); /* inode.c */ extern struct inode *udf_iget(struct super_block *, kernel_lb_addr); extern int udf_sync_inode(struct inode *); extern void udf_expand_file_adinicb(struct inode *, int, int *); -extern struct buffer_head * udf_expand_dir_adinicb(struct inode *, int *, int *); -extern struct buffer_head * udf_bread(struct inode *, int, int, int *); +extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); +extern struct buffer_head *udf_bread(struct inode *, int, int, int *); extern void udf_truncate(struct inode *); extern void udf_read_inode(struct inode *); extern void udf_delete_inode(struct inode *); extern void udf_clear_inode(struct inode *); extern int udf_write_inode(struct inode *, int); extern long udf_block_map(struct inode *, sector_t); -extern int udf_extend_file(struct inode *, struct extent_position *, kernel_long_ad *, sector_t); -extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, kernel_lb_addr *, uint32_t *, sector_t *); -extern int8_t udf_add_aext(struct inode *, struct extent_position *, kernel_lb_addr, uint32_t, int); -extern int8_t udf_write_aext(struct inode *, struct extent_position *, kernel_lb_addr, uint32_t, int); -extern int8_t udf_delete_aext(struct inode *, struct extent_position, kernel_lb_addr, uint32_t); -extern int8_t udf_next_aext(struct inode *, struct extent_position *, kernel_lb_addr *, uint32_t *, int); -extern int8_t udf_current_aext(struct inode *, struct extent_position *, kernel_lb_addr *, uint32_t *, int); +extern int udf_extend_file(struct inode *, struct extent_position *, + kernel_long_ad *, sector_t); +extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, + kernel_lb_addr *, uint32_t *, sector_t *); +extern int8_t udf_add_aext(struct inode *, struct extent_position *, + kernel_lb_addr, uint32_t, int); +extern int8_t udf_write_aext(struct inode *, struct extent_position *, + kernel_lb_addr, uint32_t, int); +extern int8_t udf_delete_aext(struct inode *, struct extent_position, + kernel_lb_addr, uint32_t); +extern int8_t udf_next_aext(struct inode *, struct extent_position *, + kernel_lb_addr *, uint32_t *, int); +extern int8_t udf_current_aext(struct inode *, struct extent_position *, + kernel_lb_addr *, uint32_t *, int); /* misc.c */ extern struct buffer_head *udf_tgetblk(struct super_block *, int); extern struct buffer_head *udf_tread(struct super_block *, int); -extern struct genericFormat *udf_add_extendedattr(struct inode *, uint32_t, uint32_t, uint8_t); -extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t, uint8_t); -extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, uint32_t, uint16_t *); -extern struct buffer_head *udf_read_ptagged(struct super_block *, kernel_lb_addr, uint32_t, uint16_t *); +extern struct genericFormat *udf_add_extendedattr(struct inode *, uint32_t, + uint32_t, uint8_t); +extern struct genericFormat *udf_get_extendedattr(struct inode *, uint32_t, + uint8_t); +extern struct buffer_head *udf_read_tagged(struct super_block *, uint32_t, + uint32_t, uint16_t *); +extern struct buffer_head *udf_read_ptagged(struct super_block *, + kernel_lb_addr, uint32_t, + uint16_t *); extern void udf_update_tag(char *, int); extern void udf_new_tag(char *, uint16_t, uint16_t, uint16_t, uint32_t, int); @@ -129,21 +139,26 @@ extern unsigned int udf_get_last_session(struct super_block *); extern unsigned long udf_get_last_block(struct super_block *); /* partition.c */ -extern uint32_t udf_get_pblock(struct super_block *, uint32_t, uint16_t, uint32_t); -extern uint32_t udf_get_pblock_virt15(struct super_block *, uint32_t, uint16_t, uint32_t); -extern uint32_t udf_get_pblock_virt20(struct super_block *, uint32_t, uint16_t, uint32_t); -extern uint32_t udf_get_pblock_spar15(struct super_block *, uint32_t, uint16_t, uint32_t); +extern uint32_t udf_get_pblock(struct super_block *, uint32_t, uint16_t, + uint32_t); +extern uint32_t udf_get_pblock_virt15(struct super_block *, uint32_t, uint16_t, + uint32_t); +extern uint32_t udf_get_pblock_virt20(struct super_block *, uint32_t, uint16_t, + uint32_t); +extern uint32_t udf_get_pblock_spar15(struct super_block *, uint32_t, uint16_t, + uint32_t); extern int udf_relocate_blocks(struct super_block *, long, long *); /* unicode.c */ extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); -extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, int); +extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, + int); extern int udf_build_ustr(struct ustr *, dstring *, int); extern int udf_CS0toUTF8(struct ustr *, struct ustr *); /* ialloc.c */ extern void udf_free_inode(struct inode *); -extern struct inode * udf_new_inode (struct inode *, int, int *); +extern struct inode *udf_new_inode(struct inode *, int, int *); /* truncate.c */ extern void udf_truncate_tail_extent(struct inode *); @@ -151,18 +166,27 @@ extern void udf_discard_prealloc(struct inode *); extern void udf_truncate_extents(struct inode *); /* balloc.c */ -extern void udf_free_blocks(struct super_block *, struct inode *, kernel_lb_addr, uint32_t, uint32_t); -extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, uint32_t, uint32_t); -extern int udf_new_block(struct super_block *, struct inode *, uint16_t, uint32_t, int *); +extern void udf_free_blocks(struct super_block *, struct inode *, + kernel_lb_addr, uint32_t, uint32_t); +extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, + uint32_t, uint32_t); +extern int udf_new_block(struct super_block *, struct inode *, uint16_t, + uint32_t, int *); /* fsync.c */ extern int udf_fsync_file(struct file *, struct dentry *, int); /* directory.c */ -extern struct fileIdentDesc * udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, struct fileIdentDesc *, struct extent_position *, kernel_lb_addr *, uint32_t *, sector_t *); -extern struct fileIdentDesc * udf_get_fileident(void * buffer, int bufsize, int * offset); -extern long_ad * udf_get_filelongad(uint8_t *, int, int *, int); -extern short_ad * udf_get_fileshortad(uint8_t *, int, int *, int); +extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, + struct udf_fileident_bh *, + struct fileIdentDesc *, + struct extent_position *, + kernel_lb_addr *, uint32_t *, + sector_t *); +extern struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, + int *offset); +extern long_ad *udf_get_filelongad(uint8_t *, int, int *, int); +extern short_ad *udf_get_fileshortad(uint8_t *, int, int *, int); /* crc.c */ extern uint16_t udf_crc(uint8_t *, uint32_t, uint16_t); @@ -171,4 +195,4 @@ extern uint16_t udf_crc(uint8_t *, uint32_t, uint16_t); extern time_t *udf_stamp_to_time(time_t *, long *, kernel_timestamp); extern kernel_timestamp *udf_time_to_stamp(kernel_timestamp *, struct timespec); -#endif /* __UDF_DECL_H */ +#endif /* __UDF_DECL_H */ diff --git a/fs/udf/udfend.h b/fs/udf/udfend.h index 17d378879561..450daab35a13 100644 --- a/fs/udf/udfend.h +++ b/fs/udf/udfend.h @@ -78,4 +78,4 @@ static inline timestamp cpu_to_lets(kernel_timestamp in) return out; } -#endif /* __UDF_ENDIAN_H */ +#endif /* __UDF_ENDIAN_H */ diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 85d8dbe843f1..b9f3198080e9 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -46,37 +46,36 @@ #endif /* How many days come before each month (0-12). */ -static const unsigned short int __mon_yday[2][13] = -{ +static const unsigned short int __mon_yday[2][13] = { /* Normal years. */ - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, /* Leap years. */ - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} }; #define MAX_YEAR_SECONDS 69 -#define SPD 0x15180 /*3600*24*/ +#define SPD 0x15180 /*3600*24 */ #define SPY(y,l,s) (SPD * (365*y+l)+s) -static time_t year_seconds[MAX_YEAR_SECONDS]= { -/*1970*/ SPY( 0, 0,0), SPY( 1, 0,0), SPY( 2, 0,0), SPY( 3, 1,0), -/*1974*/ SPY( 4, 1,0), SPY( 5, 1,0), SPY( 6, 1,0), SPY( 7, 2,0), -/*1978*/ SPY( 8, 2,0), SPY( 9, 2,0), SPY(10, 2,0), SPY(11, 3,0), -/*1982*/ SPY(12, 3,0), SPY(13, 3,0), SPY(14, 3,0), SPY(15, 4,0), -/*1986*/ SPY(16, 4,0), SPY(17, 4,0), SPY(18, 4,0), SPY(19, 5,0), -/*1990*/ SPY(20, 5,0), SPY(21, 5,0), SPY(22, 5,0), SPY(23, 6,0), -/*1994*/ SPY(24, 6,0), SPY(25, 6,0), SPY(26, 6,0), SPY(27, 7,0), -/*1998*/ SPY(28, 7,0), SPY(29, 7,0), SPY(30, 7,0), SPY(31, 8,0), -/*2002*/ SPY(32, 8,0), SPY(33, 8,0), SPY(34, 8,0), SPY(35, 9,0), -/*2006*/ SPY(36, 9,0), SPY(37, 9,0), SPY(38, 9,0), SPY(39,10,0), -/*2010*/ SPY(40,10,0), SPY(41,10,0), SPY(42,10,0), SPY(43,11,0), -/*2014*/ SPY(44,11,0), SPY(45,11,0), SPY(46,11,0), SPY(47,12,0), -/*2018*/ SPY(48,12,0), SPY(49,12,0), SPY(50,12,0), SPY(51,13,0), -/*2022*/ SPY(52,13,0), SPY(53,13,0), SPY(54,13,0), SPY(55,14,0), -/*2026*/ SPY(56,14,0), SPY(57,14,0), SPY(58,14,0), SPY(59,15,0), -/*2030*/ SPY(60,15,0), SPY(61,15,0), SPY(62,15,0), SPY(63,16,0), -/*2034*/ SPY(64,16,0), SPY(65,16,0), SPY(66,16,0), SPY(67,17,0), -/*2038*/ SPY(68,17,0) +static time_t year_seconds[MAX_YEAR_SECONDS] = { +/*1970*/ SPY(0, 0, 0), SPY(1, 0, 0), SPY(2, 0, 0), SPY(3, 1, 0), +/*1974*/ SPY(4, 1, 0), SPY(5, 1, 0), SPY(6, 1, 0), SPY(7, 2, 0), +/*1978*/ SPY(8, 2, 0), SPY(9, 2, 0), SPY(10, 2, 0), SPY(11, 3, 0), +/*1982*/ SPY(12, 3, 0), SPY(13, 3, 0), SPY(14, 3, 0), SPY(15, 4, 0), +/*1986*/ SPY(16, 4, 0), SPY(17, 4, 0), SPY(18, 4, 0), SPY(19, 5, 0), +/*1990*/ SPY(20, 5, 0), SPY(21, 5, 0), SPY(22, 5, 0), SPY(23, 6, 0), +/*1994*/ SPY(24, 6, 0), SPY(25, 6, 0), SPY(26, 6, 0), SPY(27, 7, 0), +/*1998*/ SPY(28, 7, 0), SPY(29, 7, 0), SPY(30, 7, 0), SPY(31, 8, 0), +/*2002*/ SPY(32, 8, 0), SPY(33, 8, 0), SPY(34, 8, 0), SPY(35, 9, 0), +/*2006*/ SPY(36, 9, 0), SPY(37, 9, 0), SPY(38, 9, 0), SPY(39, 10, 0), +/*2010*/ SPY(40, 10, 0), SPY(41, 10, 0), SPY(42, 10, 0), SPY(43, 11, 0), +/*2014*/ SPY(44, 11, 0), SPY(45, 11, 0), SPY(46, 11, 0), SPY(47, 12, 0), +/*2018*/ SPY(48, 12, 0), SPY(49, 12, 0), SPY(50, 12, 0), SPY(51, 13, 0), +/*2022*/ SPY(52, 13, 0), SPY(53, 13, 0), SPY(54, 13, 0), SPY(55, 14, 0), +/*2026*/ SPY(56, 14, 0), SPY(57, 14, 0), SPY(58, 14, 0), SPY(59, 15, 0), +/*2030*/ SPY(60, 15, 0), SPY(61, 15, 0), SPY(62, 15, 0), SPY(63, 16, 0), +/*2034*/ SPY(64, 16, 0), SPY(65, 16, 0), SPY(66, 16, 0), SPY(67, 17, 0), +/*2038*/ SPY(68, 17, 0) }; extern struct timezone sys_tz; @@ -84,27 +83,23 @@ extern struct timezone sys_tz; #define SECS_PER_HOUR (60 * 60) #define SECS_PER_DAY (SECS_PER_HOUR * 24) -time_t * -udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src) +time_t *udf_stamp_to_time(time_t * dest, long *dest_usec, kernel_timestamp src) { int yday; uint8_t type = src.typeAndTimezone >> 12; int16_t offset; - if (type == 1) - { + if (type == 1) { offset = src.typeAndTimezone << 4; /* sign extent offset */ offset = (offset >> 4); - if (offset == -2047) /* unspecified offset */ + if (offset == -2047) /* unspecified offset */ offset = 0; - } - else + } else offset = 0; if ((src.year < EPOCH_YEAR) || - (src.year >= EPOCH_YEAR+MAX_YEAR_SECONDS)) - { + (src.year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) { *dest = -1; *dest_usec = -1; return NULL; @@ -112,16 +107,16 @@ udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src) *dest = year_seconds[src.year - EPOCH_YEAR]; *dest -= offset * 60; - yday = ((__mon_yday[__isleap (src.year)] - [src.month-1]) + (src.day-1)); - *dest += ( ( (yday* 24) + src.hour ) * 60 + src.minute ) * 60 + src.second; - *dest_usec = src.centiseconds * 10000 + src.hundredsOfMicroseconds * 100 + src.microseconds; + yday = ((__mon_yday[__isleap(src.year)] + [src.month - 1]) + (src.day - 1)); + *dest += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second; + *dest_usec = + src.centiseconds * 10000 + src.hundredsOfMicroseconds * 100 + + src.microseconds; return dest; } - -kernel_timestamp * -udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) +kernel_timestamp *udf_time_to_stamp(kernel_timestamp * dest, struct timespec ts) { long int days, rem, y; const unsigned short int *ip; @@ -146,28 +141,28 @@ udf_time_to_stamp(kernel_timestamp *dest, struct timespec ts) #define DIV(a,b) ((a) / (b) - ((a) % (b) < 0)) #define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) - while (days < 0 || days >= (__isleap(y) ? 366 : 365)) - { + while (days < 0 || days >= (__isleap(y) ? 366 : 365)) { long int yg = y + days / 365 - (days % 365 < 0); /* Adjust DAYS and Y to match the guessed year. */ - days -= ((yg - y) * 365 - + LEAPS_THRU_END_OF (yg - 1) - - LEAPS_THRU_END_OF (y - 1)); + days -= ((yg - y) * 365 + LEAPS_THRU_END_OF(yg - 1) + - LEAPS_THRU_END_OF(y - 1)); y = yg; } dest->year = y; ip = __mon_yday[__isleap(y)]; - for (y = 11; days < (long int) ip[y]; --y) + for (y = 11; days < (long int)ip[y]; --y) continue; days -= ip[y]; dest->month = y + 1; dest->day = days + 1; dest->centiseconds = ts.tv_nsec / 10000000; - dest->hundredsOfMicroseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000) / 100; - dest->microseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000 - - dest->hundredsOfMicroseconds * 100); + dest->hundredsOfMicroseconds = + (ts.tv_nsec / 1000 - dest->centiseconds * 10000) / 100; + dest->microseconds = + (ts.tv_nsec / 1000 - dest->centiseconds * 10000 - + dest->hundredsOfMicroseconds * 100); return dest; } diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 706c92e1dcc9..46835240275c 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -29,9 +29,9 @@ static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int); -static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) +static int udf_char_to_ustr(struct ustr *dest, const uint8_t * src, int strlen) { - if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN-2) ) + if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN - 2)) return 0; memset(dest, 0, sizeof(struct ustr)); memcpy(dest->u_name, src, strlen); @@ -43,33 +43,33 @@ static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) /* * udf_build_ustr */ -int udf_build_ustr(struct ustr *dest, dstring *ptr, int size) +int udf_build_ustr(struct ustr *dest, dstring * ptr, int size) { int usesize; - if ( (!dest) || (!ptr) || (!size) ) + if ((!dest) || (!ptr) || (!size)) return -1; memset(dest, 0, sizeof(struct ustr)); - usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size; - dest->u_cmpID=ptr[0]; - dest->u_len=ptr[size-1]; - memcpy(dest->u_name, ptr+1, usesize-1); + usesize = (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size; + dest->u_cmpID = ptr[0]; + dest->u_len = ptr[size - 1]; + memcpy(dest->u_name, ptr + 1, usesize - 1); return 0; } /* * udf_build_ustr_exact */ -static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) +static int udf_build_ustr_exact(struct ustr *dest, dstring * ptr, int exactsize) { - if ( (!dest) || (!ptr) || (!exactsize) ) + if ((!dest) || (!ptr) || (!exactsize)) return -1; memset(dest, 0, sizeof(struct ustr)); - dest->u_cmpID=ptr[0]; - dest->u_len=exactsize-1; - memcpy(dest->u_name, ptr+1, exactsize-1); + dest->u_cmpID = ptr[0]; + dest->u_len = exactsize - 1; + memcpy(dest->u_name, ptr + 1, exactsize - 1); return 0; } @@ -108,22 +108,20 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) cmp_id = ocu_i->u_cmpID; utf_o->u_len = 0; - if (ocu_len == 0) - { + if (ocu_len == 0) { memset(utf_o, 0, sizeof(struct ustr)); utf_o->u_cmpID = 0; utf_o->u_len = 0; return 0; } - if ((cmp_id != 8) && (cmp_id != 16)) - { - printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); + if ((cmp_id != 8) && (cmp_id != 16)) { + printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", + cmp_id, ocu_i->u_name); return 0; } - for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;) - { + for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { /* Expand OSTA compressed Unicode to Unicode */ c = ocu[i++]; @@ -132,20 +130,22 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) /* Compress Unicode to UTF-8 */ if (c < 0x80U) - utf_o->u_name[utf_o->u_len++] = (uint8_t)c; - else if (c < 0x800U) - { - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); - } - else - { - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f)); - utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); + utf_o->u_name[utf_o->u_len++] = (uint8_t) c; + else if (c < 0x800U) { + utf_o->u_name[utf_o->u_len++] = + (uint8_t) (0xc0 | (c >> 6)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t) (0x80 | (c & 0x3f)); + } else { + utf_o->u_name[utf_o->u_len++] = + (uint8_t) (0xe0 | (c >> 12)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + utf_o->u_name[utf_o->u_len++] = + (uint8_t) (0x80 | (c & 0x3f)); } } - utf_o->u_cmpID=8; + utf_o->u_cmpID = 8; return utf_o->u_len; } @@ -173,7 +173,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) * November 12, 1997 - Andrew E. Mileski * Written, tested, and released. */ -static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) +static int udf_UTF8toCS0(dstring * ocu, struct ustr *utf, int length) { unsigned c, i, max_val, utf_char; int utf_cnt, u_len; @@ -182,53 +182,38 @@ static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) ocu[0] = 8; max_val = 0xffU; -try_again: + try_again: u_len = 0U; utf_char = 0U; utf_cnt = 0U; - for (i = 0U; i < utf->u_len; i++) - { - c = (uint8_t)utf->u_name[i]; + for (i = 0U; i < utf->u_len; i++) { + c = (uint8_t) utf->u_name[i]; /* Complete a multi-byte UTF-8 character */ - if (utf_cnt) - { + if (utf_cnt) { utf_char = (utf_char << 6) | (c & 0x3fU); if (--utf_cnt) continue; - } - else - { + } else { /* Check for a multi-byte UTF-8 character */ - if (c & 0x80U) - { + if (c & 0x80U) { /* Start a multi-byte UTF-8 character */ - if ((c & 0xe0U) == 0xc0U) - { + if ((c & 0xe0U) == 0xc0U) { utf_char = c & 0x1fU; utf_cnt = 1; - } - else if ((c & 0xf0U) == 0xe0U) - { + } else if ((c & 0xf0U) == 0xe0U) { utf_char = c & 0x0fU; utf_cnt = 2; - } - else if ((c & 0xf8U) == 0xf0U) - { + } else if ((c & 0xf8U) == 0xf0U) { utf_char = c & 0x07U; utf_cnt = 3; - } - else if ((c & 0xfcU) == 0xf8U) - { + } else if ((c & 0xfcU) == 0xf8U) { utf_char = c & 0x03U; utf_cnt = 4; - } - else if ((c & 0xfeU) == 0xfcU) - { + } else if ((c & 0xfeU) == 0xfcU) { utf_char = c & 0x01U; utf_cnt = 5; - } - else + } else goto error_out; continue; } else @@ -237,37 +222,33 @@ try_again: } /* Choose no compression if necessary */ - if (utf_char > max_val) - { - if ( 0xffU == max_val ) - { + if (utf_char > max_val) { + if (0xffU == max_val) { max_val = 0xffffU; - ocu[0] = (uint8_t)0x10U; + ocu[0] = (uint8_t) 0x10U; goto try_again; } goto error_out; } - if (max_val == 0xffffU) - { - ocu[++u_len] = (uint8_t)(utf_char >> 8); + if (max_val == 0xffffU) { + ocu[++u_len] = (uint8_t) (utf_char >> 8); } - ocu[++u_len] = (uint8_t)(utf_char & 0xffU); + ocu[++u_len] = (uint8_t) (utf_char & 0xffU); } - - if (utf_cnt) - { -error_out: + if (utf_cnt) { + error_out: ocu[++u_len] = '?'; printk(KERN_DEBUG "udf: bad UTF-8 character\n"); } - ocu[length - 1] = (uint8_t)u_len + 1; + ocu[length - 1] = (uint8_t) u_len + 1; return u_len + 1; } -static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr *ocu_i) +static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, + struct ustr *ocu_i) { uint8_t *ocu; uint32_t c; @@ -280,36 +261,35 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr * cmp_id = ocu_i->u_cmpID; utf_o->u_len = 0; - if (ocu_len == 0) - { + if (ocu_len == 0) { memset(utf_o, 0, sizeof(struct ustr)); utf_o->u_cmpID = 0; utf_o->u_len = 0; return 0; } - if ((cmp_id != 8) && (cmp_id != 16)) - { - printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); + if ((cmp_id != 8) && (cmp_id != 16)) { + printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", + cmp_id, ocu_i->u_name); return 0; } - for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;) - { + for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { /* Expand OSTA compressed Unicode to Unicode */ c = ocu[i++]; if (cmp_id == 16) c = (c << 8) | ocu[i++]; - utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - utf_o->u_len); + utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], + UDF_NAME_LEN - utf_o->u_len); } - utf_o->u_cmpID=8; + utf_o->u_cmpID = 8; return utf_o->u_len; } -static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int length) +static int udf_NLStoCS0(struct nls_table *nls, dstring * ocu, struct ustr *uni, + int length) { unsigned len, i, max_val; uint16_t uni_char; @@ -319,93 +299,87 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, i ocu[0] = 8; max_val = 0xffU; -try_again: + try_again: u_len = 0U; - for (i = 0U; i < uni->u_len; i++) - { - len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char); + for (i = 0U; i < uni->u_len; i++) { + len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); if (len <= 0) continue; - if (uni_char > max_val) - { + if (uni_char > max_val) { max_val = 0xffffU; - ocu[0] = (uint8_t)0x10U; + ocu[0] = (uint8_t) 0x10U; goto try_again; } - + if (max_val == 0xffffU) - ocu[++u_len] = (uint8_t)(uni_char >> 8); - ocu[++u_len] = (uint8_t)(uni_char & 0xffU); + ocu[++u_len] = (uint8_t) (uni_char >> 8); + ocu[++u_len] = (uint8_t) (uni_char & 0xffU); i += len - 1; } - ocu[length - 1] = (uint8_t)u_len + 1; + ocu[length - 1] = (uint8_t) u_len + 1; return u_len + 1; } -int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, int flen) +int udf_get_filename(struct super_block *sb, uint8_t * sname, uint8_t * dname, + int flen) { struct ustr filename, unifilename; int len; - if (udf_build_ustr_exact(&unifilename, sname, flen)) - { + if (udf_build_ustr_exact(&unifilename, sname, flen)) { return 0; } - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) - { - if (!udf_CS0toUTF8(&filename, &unifilename) ) - { - udf_debug("Failed in udf_get_filename: sname = %s\n", sname); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { + if (!udf_CS0toUTF8(&filename, &unifilename)) { + udf_debug("Failed in udf_get_filename: sname = %s\n", + sname); return 0; } - } - else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - { - if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) ) - { - udf_debug("Failed in udf_get_filename: sname = %s\n", sname); + } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { + if (!udf_CS0toNLS + (UDF_SB(sb)->s_nls_map, &filename, &unifilename)) { + udf_debug("Failed in udf_get_filename: sname = %s\n", + sname); return 0; } - } - else + } else return 0; - if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len, - unifilename.u_name, unifilename.u_len))) - { + if ((len = + udf_translate_to_linux(dname, filename.u_name, filename.u_len, + unifilename.u_name, unifilename.u_len))) { return len; } return 0; } -int udf_put_filename(struct super_block *sb, const uint8_t *sname, uint8_t *dname, int flen) +int udf_put_filename(struct super_block *sb, const uint8_t * sname, + uint8_t * dname, int flen) { struct ustr unifilename; int namelen; - if ( !(udf_char_to_ustr(&unifilename, sname, flen)) ) - { + if (!(udf_char_to_ustr(&unifilename, sname, flen))) { return 0; } - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) - { - if ( !(namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN)) ) - { + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { + if (! + (namelen = + udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN))) { return 0; } - } - else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - { - if ( !(namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, UDF_NAME_LEN)) ) - { + } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { + if (! + (namelen = + udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, + UDF_NAME_LEN))) { return 0; } - } - else + } else return 0; return namelen; @@ -416,40 +390,36 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, uint8_t *dnam #define CRC_MARK '#' #define EXT_SIZE 5 -static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, uint8_t *fidName, int fidNameLen) +static int udf_translate_to_linux(uint8_t * newName, uint8_t * udfName, + int udfLen, uint8_t * fidName, int fidNameLen) { - int index, newIndex = 0, needsCRC = 0; + int index, newIndex = 0, needsCRC = 0; int extIndex = 0, newExtIndex = 0, hasExt = 0; unsigned short valueCRC; uint8_t curr; const uint8_t hexChar[] = "0123456789ABCDEF"; if (udfName[0] == '.' && (udfLen == 1 || - (udfLen == 2 && udfName[1] == '.'))) - { + (udfLen == 2 && udfName[1] == '.'))) { needsCRC = 1; newIndex = udfLen; memcpy(newName, udfName, udfLen); - } - else - { - for (index = 0; index < udfLen; index++) - { + } else { + for (index = 0; index < udfLen; index++) { curr = udfName[index]; - if (curr == '/' || curr == 0) - { + if (curr == '/' || curr == 0) { needsCRC = 1; curr = ILLEGAL_CHAR_MARK; - while (index+1 < udfLen && (udfName[index+1] == '/' || - udfName[index+1] == 0)) + while (index + 1 < udfLen + && (udfName[index + 1] == '/' + || udfName[index + 1] == 0)) index++; } - if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE) - { + if (curr == EXT_MARK + && (udfLen - index - 1) <= EXT_SIZE) { if (udfLen == index + 1) hasExt = 0; - else - { + else { hasExt = 1; extIndex = index; newExtIndex = newIndex; @@ -461,26 +431,29 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen needsCRC = 1; } } - if (needsCRC) - { + if (needsCRC) { uint8_t ext[EXT_SIZE]; int localExtIndex = 0; - if (hasExt) - { + if (hasExt) { int maxFilenameLen; - for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen; - index++ ) - { + for (index = 0; + index < EXT_SIZE && extIndex + index + 1 < udfLen; + index++) { curr = udfName[extIndex + index + 1]; - if (curr == '/' || curr == 0) - { + if (curr == '/' || curr == 0) { needsCRC = 1; curr = ILLEGAL_CHAR_MARK; - while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE - && (udfName[extIndex + index + 2] == '/' || - udfName[extIndex + index + 2] == 0))) + while (extIndex + index + 2 < udfLen + && (index + 1 < EXT_SIZE + && + (udfName + [extIndex + index + 2] == + '/' + || udfName[extIndex + + index + 2] == + 0))) index++; } ext[localExtIndex++] = curr; @@ -490,8 +463,7 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen newIndex = maxFilenameLen; else newIndex = newExtIndex; - } - else if (newIndex > 250) + } else if (newIndex > 250) newIndex = 250; newName[newIndex++] = CRC_MARK; valueCRC = udf_crc(fidName, fidNameLen, 0); @@ -500,10 +472,9 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; - if (hasExt) - { + if (hasExt) { newName[newIndex++] = EXT_MARK; - for (index = 0;index < localExtIndex ;index++ ) + for (index = 0; index < localExtIndex; index++) newName[newIndex++] = ext[index]; } } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index cbcd40c8c2a0..2d4be2f247b2 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -212,19 +212,18 @@ xfs_file_fsync( } #ifdef CONFIG_XFS_DMAPI -STATIC struct page * -xfs_vm_nopage( - struct vm_area_struct *area, - unsigned long address, - int *type) +STATIC int +xfs_vm_fault( + struct vm_area_struct *vma, + struct vm_fault *vmf) { - struct inode *inode = area->vm_file->f_path.dentry->d_inode; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; bhv_vnode_t *vp = vn_from_inode(inode); ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); - if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) - return NULL; - return filemap_nopage(area, address, type); + if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) + return VM_FAULT_SIGBUS; + return filemap_fault(vma, vmf); } #endif /* CONFIG_XFS_DMAPI */ @@ -310,6 +309,7 @@ xfs_file_mmap( struct vm_area_struct *vma) { vma->vm_ops = &xfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; #ifdef CONFIG_XFS_DMAPI if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) @@ -464,14 +464,12 @@ const struct file_operations xfs_dir_file_operations = { }; static struct vm_operations_struct xfs_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, + .fault = filemap_fault, }; #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct xfs_dmapi_file_vm_ops = { - .nopage = xfs_vm_nopage, - .populate = filemap_populate, + .fault = xfs_vm_fault, #ifdef HAVE_VMOP_MPROTECT .mprotect = xfs_vm_mprotect, #endif diff --git a/include/asm-alpha/a.out.h b/include/asm-alpha/a.out.h index d97daf42753d..e43cf61649a9 100644 --- a/include/asm-alpha/a.out.h +++ b/include/asm-alpha/a.out.h @@ -101,6 +101,8 @@ struct exec #define STACK_TOP \ (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) +#define STACK_TOP_MAX 0x00120000000UL + #endif #endif /* __A_OUT_GNU_H__ */ diff --git a/include/asm-arm/a.out.h b/include/asm-arm/a.out.h index 3e5fe64c4394..d7165e86df25 100644 --- a/include/asm-arm/a.out.h +++ b/include/asm-arm/a.out.h @@ -30,6 +30,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP ((current->personality == PER_LINUX_32BIT) ? \ TASK_SIZE : TASK_SIZE_26) +#define STACK_TOP_MAX TASK_SIZE #endif #ifndef LIBRARY_START_TEXT diff --git a/include/asm-arm26/a.out.h b/include/asm-arm26/a.out.h index 9b2702c42c87..7167f54ae3fc 100644 --- a/include/asm-arm26/a.out.h +++ b/include/asm-arm26/a.out.h @@ -29,6 +29,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif #ifndef LIBRARY_START_TEXT diff --git a/include/asm-avr32/a.out.h b/include/asm-avr32/a.out.h index 50bf6e31a143..9f398ab28ed0 100644 --- a/include/asm-avr32/a.out.h +++ b/include/asm-avr32/a.out.h @@ -20,6 +20,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-cris/a.out.h b/include/asm-cris/a.out.h index 770734ce54a6..919b34a084f8 100644 --- a/include/asm-cris/a.out.h +++ b/include/asm-cris/a.out.h @@ -8,6 +8,7 @@ /* grabbed from the intel stuff */ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP struct exec diff --git a/include/asm-frv/mem-layout.h b/include/asm-frv/mem-layout.h index a025dd4514e7..aaf2a773d9d3 100644 --- a/include/asm-frv/mem-layout.h +++ b/include/asm-frv/mem-layout.h @@ -60,6 +60,7 @@ */ #define BRK_BASE __UL(2 * 1024 * 1024 + PAGE_SIZE) #define STACK_TOP __UL(2 * 1024 * 1024) +#define STACK_TOP_MAX STACK_TOP /* userspace process size */ #ifdef CONFIG_MMU diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index d984a9041436..d85172e9ed45 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -14,6 +14,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp + /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ extern int simple_identifier_##var(void); \ @@ -34,6 +39,9 @@ do { \ #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) + #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #define __raw_get_cpu_var(var) per_cpu__##var diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 84155eb67f1d..0240e0506a07 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -224,7 +224,11 @@ } #define NOTES \ - .notes : { *(.note.*) } :note + .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_notes) = .; \ + *(.note.*) \ + VMLINUX_SYMBOL(__stop_notes) = .; \ + } #define INITCALLS \ *(.initcall0.init) \ @@ -245,3 +249,11 @@ *(.initcall7.init) \ *(.initcall7s.init) +#define PERCPU(align) \ + . = ALIGN(align); \ + __per_cpu_start = .; \ + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + } \ + __per_cpu_end = .; diff --git a/include/asm-h8300/a.out.h b/include/asm-h8300/a.out.h index 3c70939f9f00..aa5d22778235 100644 --- a/include/asm-h8300/a.out.h +++ b/include/asm-h8300/a.out.h @@ -20,6 +20,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-i386/a.out.h b/include/asm-i386/a.out.h index ab17bb8e5465..851a60f8258c 100644 --- a/include/asm-i386/a.out.h +++ b/include/asm-i386/a.out.h @@ -20,6 +20,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-i386/kprobes.h b/include/asm-i386/kprobes.h index 8774d06689da..06f7303c30ca 100644 --- a/include/asm-i386/kprobes.h +++ b/include/asm-i386/kprobes.h @@ -42,7 +42,6 @@ typedef u8 kprobe_opcode_t; ? (MAX_STACK_SIZE) \ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry #define ARCH_SUPPORTS_KRETPROBES #define ARCH_INACTIVE_KPROBE_COUNT 0 #define flush_insn_slot(p) do { } while (0) diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index f54830b5d5ac..a7ebd436f3cc 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -54,6 +54,11 @@ extern unsigned long __per_cpu_offset[]; #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp + /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index 62c091ffcccc..a4d806610b7f 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h @@ -63,6 +63,7 @@ extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern void init_tsc_clocksource(void); +int check_tsc_unstable(void); /* * Boot-time check whether the TSCs are synchronized across diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index c054d7a9aaa7..efa1b8f7251d 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -90,13 +90,27 @@ enum { extern __u8 isa_irq_to_vector_map[16]; #define isa_irq_to_vector(x) isa_irq_to_vector_map[(x)] +struct irq_cfg { + ia64_vector vector; + cpumask_t domain; +}; +extern spinlock_t vector_lock; +extern struct irq_cfg irq_cfg[NR_IRQS]; +#define irq_to_domain(x) irq_cfg[(x)].domain +DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); + extern struct hw_interrupt_type irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ +extern int bind_irq_vector(int irq, int vector, cpumask_t domain); extern int assign_irq_vector (int irq); /* allocate a free vector */ extern void free_irq_vector (int vector); extern int reserve_irq_vector (int vector); +extern void __setup_vector_irq(int cpu); +extern int reassign_irq_vector(int irq, int cpu); extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); extern void register_percpu_irq (ia64_vector vec, struct irqaction *action); +extern int check_irq_used (int irq); +extern void destroy_and_reserve_irq (unsigned int irq); static inline void ia64_resend_irq(unsigned int vector) { @@ -113,7 +127,7 @@ extern irq_desc_t irq_desc[NR_IRQS]; static inline unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return (unsigned int) vec; + return __get_cpu_var(vector_irq)[vec]; } #endif @@ -131,7 +145,7 @@ __ia64_local_vector_to_irq (ia64_vector vec) static inline ia64_vector irq_to_vector (int irq) { - return (ia64_vector) irq; + return irq_cfg[irq].vector; } /* diff --git a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h index 421cb6b62a7c..b8f712859140 100644 --- a/include/asm-ia64/iosapic.h +++ b/include/asm-ia64/iosapic.h @@ -47,19 +47,21 @@ #define IOSAPIC_MASK_SHIFT 16 #define IOSAPIC_MASK (1<<IOSAPIC_MASK_SHIFT) +#define IOSAPIC_VECTOR_MASK 0xffffff00 + #ifndef __ASSEMBLY__ #ifdef CONFIG_IOSAPIC #define NR_IOSAPICS 256 -static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg) +static inline unsigned int __iosapic_read(char __iomem *iosapic, unsigned int reg) { writel(reg, iosapic + IOSAPIC_REG_SELECT); return readl(iosapic + IOSAPIC_WINDOW); } -static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +static inline void __iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) { writel(reg, iosapic + IOSAPIC_REG_SELECT); writel(val, iosapic + IOSAPIC_WINDOW); diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index 67221615e317..35b360b82e43 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -14,8 +14,13 @@ #include <linux/types.h> #include <linux/cpumask.h> -#define NR_IRQS 256 -#define NR_IRQ_VECTORS NR_IRQS +#define NR_VECTORS 256 + +#if (NR_VECTORS + 32 * NR_CPUS) < 1024 +#define NR_IRQS (NR_VECTORS + 32 * NR_CPUS) +#else +#define NR_IRQS 1024 +#endif static __inline__ int irq_canonicalize (int irq) diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 6382e52ec227..067d9dea68f9 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -82,8 +82,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; }; -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry - #define ARCH_SUPPORTS_KRETPROBES #define ARCH_INACTIVE_KPROBE_COUNT 1 diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h index fbe5cf3ab8dc..43a7aac414e0 100644 --- a/include/asm-ia64/percpu.h +++ b/include/asm-ia64/percpu.h @@ -29,6 +29,16 @@ __attribute__((__section__(".data.percpu"))) \ __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name +#ifdef CONFIG_SMP +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp +#else +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) +#endif + /* * Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an * external routine, to avoid include-hell. diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index db81ba406cef..6251c76437d2 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -295,9 +295,9 @@ struct thread_struct { regs->ar_bspstore = current->thread.rbs_bot; \ regs->ar_fpsr = FPSR_DEFAULT; \ regs->loadrs = 0; \ - regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \ + regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ - if (unlikely(!current->mm->dumpable)) { \ + if (unlikely(!get_dumpable(current->mm))) { \ /* \ * Zap scratch regs to avoid leaking bits between processes with different \ * uid/privileges. \ diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 441c9e001776..315f8de950a2 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -292,7 +292,7 @@ #define __NR_sync_file_range 1300 #define __NR_tee 1301 #define __NR_vmsplice 1302 -/* 1303 reserved for move_pages */ +#define __NR_fallocate 1303 #define __NR_getcpu 1304 #define __NR_epoll_pwait 1305 #define __NR_utimensat 1306 diff --git a/include/asm-ia64/ustack.h b/include/asm-ia64/ustack.h index a349467913ea..504167c35b8b 100644 --- a/include/asm-ia64/ustack.h +++ b/include/asm-ia64/ustack.h @@ -11,6 +11,7 @@ /* The absolute hard limit for stack size is 1/2 of the mappable space in the region */ #define MAX_USER_STACK_SIZE (RGN_MAP_LIMIT/2) #define STACK_TOP (0x6000000000000000UL + RGN_MAP_LIMIT) +#define STACK_TOP_MAX STACK_TOP #endif /* Make a default stack size of 2GiB */ diff --git a/include/asm-m32r/a.out.h b/include/asm-m32r/a.out.h index 9a4a5d20160a..6a1b5d42f328 100644 --- a/include/asm-m32r/a.out.h +++ b/include/asm-m32r/a.out.h @@ -20,6 +20,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-m68k/a.out.h b/include/asm-m68k/a.out.h index eda1662773b8..6fc86a221a94 100644 --- a/include/asm-m68k/a.out.h +++ b/include/asm-m68k/a.out.h @@ -20,6 +20,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-m68knommu/irq.h b/include/asm-m68knommu/irq.h index 7b8f874f8429..9373c31ac87d 100644 --- a/include/asm-m68knommu/irq.h +++ b/include/asm-m68knommu/irq.h @@ -1,7 +1,5 @@ -#ifndef _M68K_IRQ_H_ -#define _M68K_IRQ_H_ - -#include <asm/ptrace.h> +#ifndef _M68KNOMMU_IRQ_H_ +#define _M68KNOMMU_IRQ_H_ #ifdef CONFIG_COLDFIRE /* @@ -17,75 +15,12 @@ /* * # of m68k interrupts */ -#define SYS_IRQS 8 -#define NR_IRQS (24+SYS_IRQS) +#define SYS_IRQS 8 +#define NR_IRQS (24 + SYS_IRQS) #endif /* CONFIG_COLDFIRE */ -/* - * Interrupt source definitions - * General interrupt sources are the level 1-7. - * Adding an interrupt service routine for one of these sources - * results in the addition of that routine to a chain of routines. - * Each one is called in succession. Each individual interrupt - * service routine should determine if the device associated with - * that routine requires service. - */ -#define IRQ1 (1) /* level 1 interrupt */ -#define IRQ2 (2) /* level 2 interrupt */ -#define IRQ3 (3) /* level 3 interrupt */ -#define IRQ4 (4) /* level 4 interrupt */ -#define IRQ5 (5) /* level 5 interrupt */ -#define IRQ6 (6) /* level 6 interrupt */ -#define IRQ7 (7) /* level 7 interrupt (non-maskable) */ - -/* - * Machine specific interrupt sources. - * - * Adding an interrupt service routine for a source with this bit - * set indicates a special machine specific interrupt source. - * The machine specific files define these sources. - * - * The IRQ_MACHSPEC bit is now gone - the only thing it did was to - * introduce unnecessary overhead. - * - * All interrupt handling is actually machine specific so it is better - * to use function pointers, as used by the Sparc port, and select the - * interrupt handling functions when initializing the kernel. This way - * we save some unnecessary overhead at run-time. - * 01/11/97 - Jes - */ - -extern void (*mach_enable_irq)(unsigned int); -extern void (*mach_disable_irq)(unsigned int); - -/* - * various flags for request_irq() - the Amiga now uses the standard - * mechanism like all other architectures - IRQF_DISABLED and - * IRQF_SHARED are your friends. - */ -#define IRQ_FLG_LOCK (0x0001) /* handler is not replaceable */ -#define IRQ_FLG_REPLACE (0x0002) /* replace existing handler */ -#define IRQ_FLG_FAST (0x0004) -#define IRQ_FLG_SLOW (0x0008) -#define IRQ_FLG_STD (0x8000) /* internally used */ - -#ifdef CONFIG_M68360 - -#define CPM_INTERRUPT IRQ4 - -/* see MC68360 User's Manual, p. 7-377 */ -#define CPM_VECTOR_BASE 0x04 /* 3 MSbits of CPM vector */ - -#endif /* CONFIG_M68360 */ - -/* - * Some drivers want these entry points - */ -#define enable_irq(x) do { } while (0) -#define disable_irq(x) do { } while (0) -#define disable_irq_nosync(x) disable_irq(x) #define irq_canonicalize(irq) (irq) -#endif /* _M68K_IRQ_H_ */ +#endif /* _M68KNOMMU_IRQ_H_ */ diff --git a/include/asm-m68knommu/irqnode.h b/include/asm-m68knommu/irqnode.h deleted file mode 100644 index 6132a9858b52..000000000000 --- a/include/asm-m68knommu/irqnode.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _M68K_IRQNODE_H_ -#define _M68K_IRQNODE_H_ - -#include <linux/interrupt.h> - -/* - * This structure is used to chain together the ISRs for a particular - * interrupt source (if it supports chaining). - */ -typedef struct irq_node { - irq_handler_t handler; - unsigned long flags; - void *dev_id; - const char *devname; - struct irq_node *next; -} irq_node_t; - -/* - * This structure has only 4 elements for speed reasons - */ -struct irq_entry { - irq_handler_t handler; - unsigned long flags; - void *dev_id; - const char *devname; -}; - -/* count of spurious interrupts */ -extern volatile unsigned int num_spurious; - -/* - * This function returns a new irq_node_t - */ -extern irq_node_t *new_irq_node(void); - -#endif /* _M68K_IRQNODE_H_ */ diff --git a/include/asm-m68knommu/m68360.h b/include/asm-m68knommu/m68360.h index dd11b070884b..eb7d39ef2855 100644 --- a/include/asm-m68knommu/m68360.h +++ b/include/asm-m68knommu/m68360.h @@ -3,3 +3,11 @@ #include "m68360_quicc.h" #include "m68360_enet.h" +#ifdef CONFIG_M68360 + +#define CPM_INTERRUPT 4 + +/* see MC68360 User's Manual, p. 7-377 */ +#define CPM_VECTOR_BASE 0x04 /* 3 MSbits of CPM vector */ + +#endif /* CONFIG_M68360 */ diff --git a/include/asm-m68knommu/pgtable.h b/include/asm-m68knommu/pgtable.h index 9dfbbc24aa71..e1e6a1d2333a 100644 --- a/include/asm-m68knommu/pgtable.h +++ b/include/asm-m68knommu/pgtable.h @@ -49,7 +49,6 @@ static inline int pte_file(pte_t pte) { return 0; } * These would be in other places but having them here reduces the diffs. */ extern unsigned int kobjsize(const void *objp); -extern int is_in_rom(unsigned long); /* * No page table caches to initialise. diff --git a/include/asm-m68knommu/traps.h b/include/asm-m68knommu/traps.h index f2a81317cc10..d0671e5f8e29 100644 --- a/include/asm-m68knommu/traps.h +++ b/include/asm-m68knommu/traps.h @@ -16,6 +16,10 @@ typedef void (*e_vector)(void); extern e_vector vectors[]; +extern void init_vectors(void); +extern void enable_vector(unsigned int irq); +extern void disable_vector(unsigned int irq); +extern void ack_vector(unsigned int irq); #endif diff --git a/include/asm-m68knommu/uaccess.h b/include/asm-m68knommu/uaccess.h index 62b29b10bc6d..9ed9169a8849 100644 --- a/include/asm-m68knommu/uaccess.h +++ b/include/asm-m68knommu/uaccess.h @@ -15,12 +15,15 @@ #define access_ok(type,addr,size) _access_ok((unsigned long)(addr),(size)) +/* + * It is not enough to just have access_ok check for a real RAM address. + * This would disallow the case of code/ro-data running XIP in flash/rom. + * Ideally we would check the possible flash ranges too, but that is + * currently not so easy. + */ static inline int _access_ok(unsigned long addr, unsigned long size) { - extern unsigned long memory_start, memory_end; - - return (((addr >= memory_start) && (addr+size < memory_end)) || - (is_in_rom(addr) && is_in_rom(addr+size))); + return 1; } /* diff --git a/include/asm-mips/a.out.h b/include/asm-mips/a.out.h index ef33c3f13484..1ad60ba186d0 100644 --- a/include/asm-mips/a.out.h +++ b/include/asm-mips/a.out.h @@ -40,6 +40,7 @@ struct exec #ifdef CONFIG_64BIT #define STACK_TOP (current->thread.mflags & MF_32BIT_ADDR ? TASK_SIZE32 : TASK_SIZE) #endif +#define STACK_TOP_MAX TASK_SIZE #endif diff --git a/include/asm-parisc/a.out.h b/include/asm-parisc/a.out.h index 2a490cc9ec91..23e2c90943e5 100644 --- a/include/asm-parisc/a.out.h +++ b/include/asm-parisc/a.out.h @@ -23,6 +23,7 @@ struct exec * prumpf */ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX DEFAULT_TASK_SIZE #endif diff --git a/include/asm-powerpc/a.out.h b/include/asm-powerpc/a.out.h index c7393a977364..5c5ea83f9349 100644 --- a/include/asm-powerpc/a.out.h +++ b/include/asm-powerpc/a.out.h @@ -26,9 +26,12 @@ struct exec #define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ STACK_TOP_USER32 : STACK_TOP_USER64) +#define STACK_TOP_MAX STACK_TOP_USER64 + #else /* __powerpc64__ */ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif /* __powerpc64__ */ #endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h index 9537fda238b8..8b08b447d6f3 100644 --- a/include/asm-powerpc/kprobes.h +++ b/include/asm-powerpc/kprobes.h @@ -73,12 +73,10 @@ typedef unsigned int kprobe_opcode_t; } \ } -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)((func_descr_t *)pentry) #define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \ IS_TWI(instr) || IS_TDI(instr)) #else /* Use stock kprobe_lookup_name since ppc32 doesn't use function descriptors */ -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)(pentry) #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h index 2f2e3024fa61..73dc8ba4010d 100644 --- a/include/asm-powerpc/percpu.h +++ b/include/asm-powerpc/percpu.h @@ -20,6 +20,11 @@ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp + /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) @@ -40,6 +45,8 @@ extern void setup_per_cpu_areas(void); #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var diff --git a/include/asm-s390/a.out.h b/include/asm-s390/a.out.h index 72adee6ef338..46158dcaf517 100644 --- a/include/asm-s390/a.out.h +++ b/include/asm-s390/a.out.h @@ -32,6 +32,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX DEFAULT_TASK_SIZE #endif diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h index 830fe4c4eea6..340ba10446ea 100644 --- a/include/asm-s390/kprobes.h +++ b/include/asm-s390/kprobes.h @@ -46,8 +46,6 @@ typedef u16 kprobe_opcode_t; ? (MAX_STACK_SIZE) \ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)(pentry) - #define ARCH_SUPPORTS_KRETPROBES #define ARCH_INACTIVE_KPROBE_COUNT 0 diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index 9ea7f1023e57..545857e64443 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -41,6 +41,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; __attribute__((__section__(".data.percpu"))) \ __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp + #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) @@ -59,6 +64,8 @@ do { \ #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #define __get_cpu_var(var) __reloc_hide(var,0) #define __raw_get_cpu_var(var) __reloc_hide(var,0) diff --git a/include/asm-sh/a.out.h b/include/asm-sh/a.out.h index 6e9fca9ee333..685d0f6125fa 100644 --- a/include/asm-sh/a.out.h +++ b/include/asm-sh/a.out.h @@ -20,6 +20,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-sh64/a.out.h b/include/asm-sh64/a.out.h index e1995e86b663..237ee4e5b72a 100644 --- a/include/asm-sh64/a.out.h +++ b/include/asm-sh64/a.out.h @@ -31,6 +31,7 @@ struct exec #ifdef __KERNEL__ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP #endif diff --git a/include/asm-sparc/a.out.h b/include/asm-sparc/a.out.h index 9090060a23e6..917e04250696 100644 --- a/include/asm-sparc/a.out.h +++ b/include/asm-sparc/a.out.h @@ -92,6 +92,7 @@ struct relocation_info /* used when header.a_machtype == M_SPARC */ #include <asm/page.h> #define STACK_TOP (PAGE_OFFSET - PAGE_SIZE) +#define STACK_TOP_MAX STACK_TOP #endif /* __KERNEL__ */ diff --git a/include/asm-sparc64/a.out.h b/include/asm-sparc64/a.out.h index eb3b8e90b279..902e07f89a42 100644 --- a/include/asm-sparc64/a.out.h +++ b/include/asm-sparc64/a.out.h @@ -101,6 +101,8 @@ struct relocation_info /* used when header.a_machtype == M_SPARC */ #define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ STACK_TOP32 : STACK_TOP64) +#define STACK_TOP_MAX STACK_TOP64 + #endif #endif /* !(__ASSEMBLY__) */ diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h index a331b7b0dff2..7f6774dca5f4 100644 --- a/include/asm-sparc64/kprobes.h +++ b/include/asm-sparc64/kprobes.h @@ -10,7 +10,6 @@ typedef u32 kprobe_opcode_t; #define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */ #define MAX_INSN_SIZE 2 -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry #define arch_remove_kprobe(p) do {} while (0) #define ARCH_INACTIVE_KPROBE_COUNT 0 diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index 88db872ce2f8..caf8750792ff 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -18,6 +18,11 @@ extern unsigned long __per_cpu_shift; #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp + register unsigned long __local_per_cpu_offset asm("g5"); /* var is in discarded region: offset to particular copy we want */ @@ -38,6 +43,8 @@ do { \ #define real_setup_per_cpu_areas() do { } while (0) #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var diff --git a/include/asm-um/a.out.h b/include/asm-um/a.out.h index 7016b893ac9d..78bc9eed26b2 100644 --- a/include/asm-um/a.out.h +++ b/include/asm-um/a.out.h @@ -17,4 +17,6 @@ extern int honeypot; #define STACK_TOP \ CHOOSE_MODE((honeypot ? host_task_size : task_size), task_size) +#define STACK_TOP_MAX STACK_TOP + #endif diff --git a/include/asm-x86_64/a.out.h b/include/asm-x86_64/a.out.h index 7255cde06538..e789300e41a5 100644 --- a/include/asm-x86_64/a.out.h +++ b/include/asm-x86_64/a.out.h @@ -21,7 +21,8 @@ struct exec #ifdef __KERNEL__ #include <linux/thread_info.h> -#define STACK_TOP TASK_SIZE +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX TASK_SIZE64 #endif #endif /* __A_OUT_GNU_H__ */ diff --git a/include/asm-x86_64/kprobes.h b/include/asm-x86_64/kprobes.h index cf5317898fb0..7db825403e01 100644 --- a/include/asm-x86_64/kprobes.h +++ b/include/asm-x86_64/kprobes.h @@ -41,7 +41,6 @@ typedef u8 kprobe_opcode_t; ? (MAX_STACK_SIZE) \ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) -#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry #define ARCH_SUPPORTS_KRETPROBES #define ARCH_INACTIVE_KPROBE_COUNT 1 diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index c6fbb67eac90..5abd48270101 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -20,6 +20,11 @@ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + __typeof__(type) per_cpu__##name \ + ____cacheline_internodealigned_in_smp + /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ extern int simple_identifier_##var(void); \ @@ -46,6 +51,8 @@ extern void setup_per_cpu_areas(void); #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var diff --git a/include/asm-xtensa/a.out.h b/include/asm-xtensa/a.out.h index ffc4dcfd6ac1..05a2f67c6768 100644 --- a/include/asm-xtensa/a.out.h +++ b/include/asm-xtensa/a.out.h @@ -17,6 +17,7 @@ /* Note: the kernel needs the a.out definitions, even if only ELF is used. */ #define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP struct exec { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fccd8b548d93..dc234c508a6f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -122,7 +122,7 @@ extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; extern int sbf_port; -extern unsigned long acpi_video_flags; +extern unsigned long acpi_realmode_flags; #else /* !CONFIG_ACPI */ diff --git a/include/linux/aio.h b/include/linux/aio.h index b903fc02bdb7..d10e608f232d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -86,7 +86,7 @@ struct kioctx; */ struct kiocb { struct list_head ki_run_list; - long ki_flags; + unsigned long ki_flags; int ki_users; unsigned ki_key; /* id of this request */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index e1a708337be3..91c8c07fe8b7 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -6,11 +6,13 @@ struct pt_regs; /* - * MAX_ARG_PAGES defines the number of pages allocated for arguments - * and envelope for the new program. 32 should suffice, this gives - * a maximum env+arg of 128kB w/4KB pages! + * These are the maximum length and maximum number of strings passed to the + * execve() system call. MAX_ARG_STRLEN is essentially random but serves to + * prevent the kernel from being unduly impacted by misaddressed pointers. + * MAX_ARG_STRINGS is chosen to fit in a signed 32-bit integer. */ -#define MAX_ARG_PAGES 32 +#define MAX_ARG_STRLEN (PAGE_SIZE * 32) +#define MAX_ARG_STRINGS 0x7FFFFFFF /* sizeof(linux_binprm->buf) */ #define BINPRM_BUF_SIZE 128 @@ -24,7 +26,12 @@ struct pt_regs; */ struct linux_binprm{ char buf[BINPRM_BUF_SIZE]; +#ifdef CONFIG_MMU + struct vm_area_struct *vma; +#else +# define MAX_ARG_PAGES 32 struct page *page[MAX_ARG_PAGES]; +#endif struct mm_struct *mm; unsigned long p; /* current top of mem */ int sh_bang; @@ -40,6 +47,7 @@ struct linux_binprm{ unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; + unsigned long argv_len; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 @@ -68,7 +76,7 @@ extern int register_binfmt(struct linux_binfmt *); extern int unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); -extern void remove_arg_zero(struct linux_binprm *); +extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); @@ -85,6 +93,7 @@ extern int suid_dumpable; extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); +extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void compute_creds(struct linux_binprm *binprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index e4ac016ad272..c4079b403e9e 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -43,9 +43,6 @@ int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); -/* global variables */ -extern int coda_fake_statfs; - /* this file: heloers */ static __inline__ struct CodaFid *coda_i2f(struct inode *); static __inline__ char *coda_i2s(struct inode *); diff --git a/include/linux/coda_proc.h b/include/linux/coda_proc.h deleted file mode 100644 index 0dc1b0458e75..000000000000 --- a/include/linux/coda_proc.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * coda_statis.h - * - * CODA operation statistics - * - * (c) March, 1998 - * by Michihiro Kuramochi, Zhenyu Xia and Zhanyong Wan - * zhanyong.wan@yale.edu - * - */ - -#ifndef _CODA_PROC_H -#define _CODA_PROC_H - -void coda_sysctl_init(void); -void coda_sysctl_clean(void); - -#include <linux/sysctl.h> -#include <linux/coda_fs_i.h> -#include <linux/coda.h> - -/* these four files are presented to show the result of the statistics: - * - * /proc/fs/coda/vfs_stats - * cache_inv_stats - * - * these four files are presented to reset the statistics to 0: - * - * /proc/sys/coda/vfs_stats - * cache_inv_stats - */ - -/* VFS operation statistics */ -struct coda_vfs_stats -{ - /* file operations */ - int open; - int flush; - int release; - int fsync; - - /* dir operations */ - int readdir; - - /* inode operations */ - int create; - int lookup; - int link; - int unlink; - int symlink; - int mkdir; - int rmdir; - int rename; - int permission; - - /* symlink operatoins*/ - int follow_link; - int readlink; -}; - -/* cache invalidation statistics */ -struct coda_cache_inv_stats -{ - int flush; - int purge_user; - int zap_dir; - int zap_file; - int zap_vnode; - int purge_fid; - int replace; -}; - -/* these global variables hold the actual statistics data */ -extern struct coda_vfs_stats coda_vfs_stat; - -#endif /* _CODA_PROC_H */ diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index b541bb3d1f4b..aa8f454b3b77 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -8,11 +8,6 @@ struct kstatfs; -struct coda_sb_info -{ - struct venus_comm *sbi_vcomm; -}; - /* communication pending/processing queues */ struct venus_comm { u_long vc_seq; @@ -24,9 +19,9 @@ struct venus_comm { }; -static inline struct coda_sb_info *coda_sbp(struct super_block *sb) +static inline struct venus_comm *coda_vcp(struct super_block *sb) { - return ((struct coda_sb_info *)((sb)->s_fs_info)); + return (struct venus_comm *)((sb)->s_fs_info); } @@ -74,8 +69,6 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs); /* messages between coda filesystem in kernel and Venus */ -extern int coda_hard; -extern unsigned long coda_timeout; struct upc_req { struct list_head uc_chain; caddr_t uc_data; @@ -85,7 +78,6 @@ struct upc_req { u_short uc_opcode; /* copied from data to save lookup */ int uc_unique; wait_queue_head_t uc_sleep; /* process' wait queue */ - unsigned long uc_posttime; }; #define REQ_ASYNC 0x1 diff --git a/include/linux/device.h b/include/linux/device.h index be2debed70d2..d9f0a57f5a2f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -572,6 +572,16 @@ dev_dbg(struct device * dev, const char * fmt, ...) } #endif +#ifdef VERBOSE_DEBUG +#define dev_vdbg dev_dbg +#else +static inline int __attribute__ ((format (printf, 2, 3))) +dev_vdbg(struct device * dev, const char * fmt, ...) +{ + return 0; +} +#endif + #define dev_err(dev, format, arg...) \ dev_printk(KERN_ERR , dev , format , ## arg) #define dev_info(dev, format, arg...) \ diff --git a/include/linux/edac.h b/include/linux/edac.h new file mode 100644 index 000000000000..eab451e69a91 --- /dev/null +++ b/include/linux/edac.h @@ -0,0 +1,29 @@ +/* + * Generic EDAC defs + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#ifndef _LINUX_EDAC_H_ +#define _LINUX_EDAC_H_ + +#include <asm/atomic.h> + +#define EDAC_OPSTATE_INVAL -1 +#define EDAC_OPSTATE_POLL 0 +#define EDAC_OPSTATE_NMI 1 +#define EDAC_OPSTATE_INT 2 + +extern int edac_op_state; +extern int edac_err_assert; +extern atomic_t edac_handlers; + +extern int edac_handler_set(void); +extern void edac_atomic_assert_error(void); + +#endif diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 2d38b1a74662..c8e02de737f6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -25,7 +25,7 @@ static inline int freezing(struct task_struct *p) /* * Request that a process be frozen */ -static inline void freeze(struct task_struct *p) +static inline void set_freeze_flag(struct task_struct *p) { set_tsk_thread_flag(p, TIF_FREEZE); } @@ -33,7 +33,7 @@ static inline void freeze(struct task_struct *p) /* * Sometimes we may need to cancel the previous 'freeze' request */ -static inline void do_not_freeze(struct task_struct *p) +static inline void clear_freeze_flag(struct task_struct *p) { clear_tsk_thread_flag(p, TIF_FREEZE); } @@ -56,7 +56,7 @@ static inline int thaw_process(struct task_struct *p) wake_up_process(p); return 1; } - clear_tsk_thread_flag(p, TIF_FREEZE); + clear_freeze_flag(p); task_unlock(p); return 0; } @@ -129,7 +129,8 @@ static inline void set_freezable(void) #else static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } -static inline void freeze(struct task_struct *p) { BUG(); } +static inline void set_freeze_flag(struct task_struct *p) {} +static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } static inline void refrigerator(void) {} diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b806c5e32eb..d33beadd9a43 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -697,20 +697,26 @@ struct fown_struct { * Track a single file's readahead state */ struct file_ra_state { - unsigned long start; /* Current window */ - unsigned long size; - unsigned long flags; /* ra flags RA_FLAG_xxx*/ - unsigned long cache_hit; /* cache hit count*/ - unsigned long prev_index; /* Cache last read() position */ - unsigned long ahead_start; /* Ahead window */ - unsigned long ahead_size; + pgoff_t start; /* where readahead started */ + unsigned long size; /* # of readahead pages */ + unsigned long async_size; /* do asynchronous readahead when + there are only # of pages ahead */ + unsigned long ra_pages; /* Maximum readahead window */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ + unsigned long prev_index; /* Cache last read() position */ unsigned int prev_offset; /* Offset where last read() ended in a page */ }; -#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */ -#define RA_FLAG_INCACHE 0x02 /* file is already in cache */ + +/* + * Check if @index falls in the readahead windows. + */ +static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) +{ + return (index >= ra->start && + index < ra->start + ra->size); +} struct file { /* @@ -862,7 +868,7 @@ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); -extern int posix_test_lock(struct file *, struct file_lock *); +extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); @@ -873,6 +879,7 @@ extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int setlease(struct file *, long, struct file_lock **); +extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); @@ -1122,6 +1129,7 @@ struct file_operations { int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long, struct file_lock **); }; struct inode_operations { @@ -1461,7 +1469,7 @@ extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int register_chrdev(unsigned int, const char *, const struct file_operations *); -extern int unregister_chrdev(unsigned int, const char *); +extern void unregister_chrdev(unsigned int, const char *); extern void unregister_chrdev_region(dev_t, unsigned); extern int chrdev_open(struct inode *, struct file *); extern void chrdev_show(struct seq_file *,off_t); diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 695741b0e420..1831b196c70a 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -53,6 +53,7 @@ struct gianfar_platform_data { u32 bus_id; u32 phy_id; u8 mac_addr[6]; + phy_interface_t interface; }; struct gianfar_mdio_data { diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 12c5e4e3135a..1fcb0033179e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -103,21 +103,6 @@ __alloc_zeroed_user_highpage(gfp_t movableflags, #endif /** - * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA - * @vma: The VMA the page is to be allocated for - * @vaddr: The virtual address the page will be inserted into - * - * This function will allocate a page for a VMA that the caller knows will - * not be able to move in the future using move_pages() or reclaim. If it - * is known that the page can move, use alloc_zeroed_user_highpage_movable - */ -static inline struct page * -alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) -{ - return __alloc_zeroed_user_highpage(0, vma, vaddr); -} - -/** * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move * @vma: The VMA the page is to be allocated for * @vaddr: The virtual address the page will be inserted into diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 06cbf41d32d2..aa2fe22b1baa 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -36,15 +36,24 @@ extern char uevent_helper[]; /* counter to tag the uevent, read only except for the kobject core */ extern u64 uevent_seqnum; -/* the actions here must match the proper string in lib/kobject_uevent.c */ -typedef int __bitwise kobject_action_t; +/* + * The actions here must match the index to the string array + * in lib/kobject_uevent.c + * + * Do not add new actions here without checking with the driver-core + * maintainers. Action strings are not meant to express subsystem + * or device specific properties. In most cases you want to send a + * kobject_uevent_env(kobj, KOBJ_CHANGE, env) with additional event + * specific variables added to the event environment. + */ enum kobject_action { - KOBJ_ADD = (__force kobject_action_t) 0x01, /* exclusive to core */ - KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* exclusive to core */ - KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* device state change */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* device offline */ - KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* device online */ - KOBJ_MOVE = (__force kobject_action_t) 0x06, /* device move */ + KOBJ_ADD, + KOBJ_REMOVE, + KOBJ_CHANGE, + KOBJ_MOVE, + KOBJ_ONLINE, + KOBJ_OFFLINE, + KOBJ_MAX }; struct kobject { diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 23adf6075ae4..51464d12a4e5 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -116,9 +116,12 @@ struct kprobe { */ struct jprobe { struct kprobe kp; - kprobe_opcode_t *entry; /* probe handling code to jump to */ + void *entry; /* probe handling code to jump to */ }; +/* For backward compatibility with old code using JPROBE_ENTRY() */ +#define JPROBE_ENTRY(handler) (handler) + DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -211,6 +214,7 @@ int longjmp_break_handler(struct kprobe *, struct pt_regs *); int register_jprobe(struct jprobe *p); void unregister_jprobe(struct jprobe *p); void jprobe_return(void); +unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); diff --git a/include/linux/lguest.h b/include/linux/lguest.h new file mode 100644 index 000000000000..500aace21ca7 --- /dev/null +++ b/include/linux/lguest.h @@ -0,0 +1,85 @@ +/* Things the lguest guest needs to know. Note: like all lguest interfaces, + * this is subject to wild and random change between versions. */ +#ifndef _ASM_LGUEST_H +#define _ASM_LGUEST_H + +#ifndef __ASSEMBLY__ +#include <asm/irq.h> + +#define LHCALL_FLUSH_ASYNC 0 +#define LHCALL_LGUEST_INIT 1 +#define LHCALL_CRASH 2 +#define LHCALL_LOAD_GDT 3 +#define LHCALL_NEW_PGTABLE 4 +#define LHCALL_FLUSH_TLB 5 +#define LHCALL_LOAD_IDT_ENTRY 6 +#define LHCALL_SET_STACK 7 +#define LHCALL_TS 8 +#define LHCALL_SET_CLOCKEVENT 9 +#define LHCALL_HALT 10 +#define LHCALL_GET_WALLCLOCK 11 +#define LHCALL_BIND_DMA 12 +#define LHCALL_SEND_DMA 13 +#define LHCALL_SET_PTE 14 +#define LHCALL_SET_PMD 15 +#define LHCALL_LOAD_TLS 16 + +#define LG_CLOCK_MIN_DELTA 100UL +#define LG_CLOCK_MAX_DELTA ULONG_MAX + +#define LGUEST_TRAP_ENTRY 0x1F + +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + : "=a"(call) + : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3) + : "memory"); + return call; +} + +void async_hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3); + +/* Can't use our min() macro here: needs to be a constant */ +#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) + +#define LHCALL_RING_SIZE 64 +struct hcall_ring +{ + u32 eax, edx, ebx, ecx; +}; + +/* All the good stuff happens here: guest registers it with LGUEST_INIT */ +struct lguest_data +{ +/* Fields which change during running: */ + /* 512 == enabled (same as eflags) */ + unsigned int irq_enabled; + /* Interrupts blocked by guest. */ + DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS); + + /* Virtual address of page fault. */ + unsigned long cr2; + + /* Async hypercall ring. 0xFF == done, 0 == pending. */ + u8 hcall_status[LHCALL_RING_SIZE]; + struct hcall_ring hcalls[LHCALL_RING_SIZE]; + +/* Fields initialized by the hypervisor at boot: */ + /* Memory not to try to access */ + unsigned long reserve_mem; + /* ID of this guest (used by network driver to set ethernet address) */ + u16 guestid; + /* KHz for the TSC clock. */ + u32 tsc_khz; + +/* Fields initialized by the guest at boot: */ + /* Instruction range to suppress interrupts even if enabled */ + unsigned long noirq_start, noirq_end; +}; +extern struct lguest_data lguest_data; +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_LGUEST_H */ diff --git a/include/linux/lguest_bus.h b/include/linux/lguest_bus.h new file mode 100644 index 000000000000..c9b4e05fee49 --- /dev/null +++ b/include/linux/lguest_bus.h @@ -0,0 +1,48 @@ +#ifndef _ASM_LGUEST_DEVICE_H +#define _ASM_LGUEST_DEVICE_H +/* Everything you need to know about lguest devices. */ +#include <linux/device.h> +#include <linux/lguest.h> +#include <linux/lguest_launcher.h> + +struct lguest_device { + /* Unique busid, and index into lguest_page->devices[] */ + unsigned int index; + + struct device dev; + + /* Driver can hang data off here. */ + void *private; +}; + +/* By convention, each device can use irq index+1 if it wants to. */ +static inline int lgdev_irq(const struct lguest_device *dev) +{ + return dev->index + 1; +} + +/* dma args must not be vmalloced! */ +void lguest_send_dma(unsigned long key, struct lguest_dma *dma); +int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas, + unsigned int num, u8 irq); +void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas); + +/* Map the virtual device space */ +void *lguest_map(unsigned long phys_addr, unsigned long pages); +void lguest_unmap(void *); + +struct lguest_driver { + const char *name; + struct module *owner; + u16 device_type; + int (*probe)(struct lguest_device *dev); + void (*remove)(struct lguest_device *dev); + + struct device_driver drv; +}; + +extern int register_lguest_driver(struct lguest_driver *drv); +extern void unregister_lguest_driver(struct lguest_driver *drv); + +extern struct lguest_device_desc *lguest_devices; /* Just past max_pfn */ +#endif /* _ASM_LGUEST_DEVICE_H */ diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h new file mode 100644 index 000000000000..0ba414a40c80 --- /dev/null +++ b/include/linux/lguest_launcher.h @@ -0,0 +1,73 @@ +#ifndef _ASM_LGUEST_USER +#define _ASM_LGUEST_USER +/* Everything the "lguest" userspace program needs to know. */ +/* They can register up to 32 arrays of lguest_dma. */ +#define LGUEST_MAX_DMA 32 +/* At most we can dma 16 lguest_dma in one op. */ +#define LGUEST_MAX_DMA_SECTIONS 16 + +/* How many devices? Assume each one wants up to two dma arrays per device. */ +#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2) + +struct lguest_dma +{ + /* 0 if free to be used, filled by hypervisor. */ + u32 used_len; + unsigned long addr[LGUEST_MAX_DMA_SECTIONS]; + u16 len[LGUEST_MAX_DMA_SECTIONS]; +}; + +struct lguest_block_page +{ + /* 0 is a read, 1 is a write. */ + int type; + u32 sector; /* Offset in device = sector * 512. */ + u32 bytes; /* Length expected to be read/written in bytes */ + /* 0 = pending, 1 = done, 2 = done, error */ + int result; + u32 num_sectors; /* Disk length = num_sectors * 512 */ +}; + +/* There is a shared page of these. */ +struct lguest_net +{ + /* Simply the mac address (with multicast bit meaning promisc). */ + unsigned char mac[6]; +}; + +/* Where the Host expects the Guest to SEND_DMA console output to. */ +#define LGUEST_CONSOLE_DMA_KEY 0 + +/* We have a page of these descriptors in the lguest_device page. */ +struct lguest_device_desc { + u16 type; +#define LGUEST_DEVICE_T_CONSOLE 1 +#define LGUEST_DEVICE_T_NET 2 +#define LGUEST_DEVICE_T_BLOCK 3 + + u16 features; +#define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */ +#define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */ + + u16 status; +/* 256 and above are device specific. */ +#define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */ +#define LGUEST_DEVICE_S_DRIVER 2 /* We have found a driver */ +#define LGUEST_DEVICE_S_DRIVER_OK 4 /* Driver says OK! */ +#define LGUEST_DEVICE_S_REMOVED 8 /* Device has gone away. */ +#define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */ +#define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */ + + u16 num_pages; + u32 pfn; +}; + +/* Write command first word is a request. */ +enum lguest_req +{ + LHREQ_INITIALIZE, /* + pfnlimit, pgdir, start, pageoffset */ + LHREQ_GETDMA, /* + addr (returns &lguest_dma, irq in ->used_len) */ + LHREQ_IRQ, /* + irq */ + LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ +}; +#endif /* _ASM_LGUEST_USER */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 14c937d345cb..0e843bf65877 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -1,7 +1,8 @@ /* * Runtime locking correctness validator * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * see Documentation/lockdep-design.txt for more details. */ @@ -9,6 +10,7 @@ #define __LINUX_LOCKDEP_H struct task_struct; +struct lockdep_map; #ifdef CONFIG_LOCKDEP @@ -114,8 +116,44 @@ struct lock_class { const char *name; int name_version; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[4]; +#endif +}; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, }; +struct lock_class_stats { + unsigned long contention_point[4]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + /* * Map the lock object (the lock instance) to the lock-class object. * This is embedded into specific lock instances: @@ -124,6 +162,9 @@ struct lockdep_map { struct lock_class_key *key; struct lock_class *class_cache; const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; +#endif }; /* @@ -165,6 +206,10 @@ struct held_lock { unsigned long acquire_ip; struct lockdep_map *instance; +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif /* * The lock-stack is unified in that the lock chains of interrupt * contexts nest ontop of process context chains, but we 'separate' @@ -281,6 +326,30 @@ struct lock_class_key { }; #endif /* !LOCKDEP */ +#ifdef CONFIG_LOCK_STAT + +extern void lock_contended(struct lockdep_map *lock, unsigned long ip); +extern void lock_acquired(struct lockdep_map *lock); + +#define LOCK_CONTENDED(_lock, try, lock) \ +do { \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + lock(_lock); \ + } \ + lock_acquired(&(_lock)->dep_map); \ +} while (0) + +#else /* CONFIG_LOCK_STAT */ + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map) do {} while (0) + +#define LOCK_CONTENDED(_lock, try, lock) \ + lock(_lock) + +#endif /* CONFIG_LOCK_STAT */ + #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) extern void early_init_irq_lock_class(void); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index a5c451816fdc..c456c3a1c28e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -168,6 +168,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ +#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif @@ -190,6 +192,30 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; +#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ +#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ + + +/* + * vm_fault is filled by the the pagefault handler and passed to the vma's + * ->fault function. The vma's ->fault is responsible for returning a bitmask + * of VM_FAULT_xxx flags that give details about how the fault was handled. + * + * pgoff should be used in favour of virtual_address, if possible. If pgoff + * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear + * mapping support. + */ +struct vm_fault { + unsigned int flags; /* FAULT_FLAG_xxx flags */ + pgoff_t pgoff; /* Logical page offset based on vma */ + void __user *virtual_address; /* Faulting virtual address */ + + struct page *page; /* ->fault handlers should return a + * page here, unless VM_FAULT_NOPAGE + * is set (which is also implied by + * VM_FAULT_ERROR). + */ +}; /* * These are the virtual MM functions - opening of an area, closing and @@ -199,9 +225,11 @@ extern pgprot_t protection_map[16]; struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); - unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); - int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); + int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + struct page *(*nopage)(struct vm_area_struct *area, + unsigned long address, int *type); + unsigned long (*nopfn)(struct vm_area_struct *area, + unsigned long address); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -655,7 +683,6 @@ static inline int page_mapped(struct page *page) */ #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) -#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ /* * Error return values for the *_nopfn functions @@ -669,16 +696,18 @@ static inline int page_mapped(struct page *page) * Used to decide whether a process gets delivered SIGBUS or * just gets major/minor fault counters bumped up. */ -#define VM_FAULT_OOM 0x00 -#define VM_FAULT_SIGBUS 0x01 -#define VM_FAULT_MINOR 0x02 -#define VM_FAULT_MAJOR 0x03 - -/* - * Special case for get_user_pages. - * Must be in a distinct bit from the above VM_FAULT_ flags. - */ -#define VM_FAULT_WRITE 0x10 + +#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */ + +#define VM_FAULT_OOM 0x0001 +#define VM_FAULT_SIGBUS 0x0002 +#define VM_FAULT_MAJOR 0x0004 +#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ + +#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ +#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ + +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) @@ -762,20 +791,10 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); -extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); -extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); #ifdef CONFIG_MMU -extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, +extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access); - -static inline int handle_mm_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - int write_access) -{ - return __handle_mm_fault(mm, vma, address, write_access) & - (~VM_FAULT_WRITE); -} #else static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, @@ -789,7 +808,6 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); -void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); @@ -806,9 +824,15 @@ int FASTCALL(set_page_dirty(struct page *page)); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +extern unsigned long move_page_tables(struct vm_area_struct *vma, + unsigned long old_addr, struct vm_area_struct *new_vma, + unsigned long new_addr, unsigned long len); extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); +extern int mprotect_fixup(struct vm_area_struct *vma, + struct vm_area_struct **pprev, unsigned long start, + unsigned long end, unsigned long newflags); /* * A callback you can register to apply pressure to ageable caches. @@ -1104,9 +1128,7 @@ extern void truncate_inode_pages_range(struct address_space *, loff_t lstart, loff_t lend); /* generic vm_area_ops exported for stackable file systems */ -extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); -extern int filemap_populate(struct vm_area_struct *, unsigned long, - unsigned long, pgprot_t, unsigned long, int); +extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); @@ -1121,13 +1143,20 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); -unsigned long page_cache_readahead(struct address_space *mapping, - struct file_ra_state *ra, - struct file *filp, - pgoff_t offset, - unsigned long size); -void handle_ra_miss(struct address_space *mapping, - struct file_ra_state *ra, pgoff_t offset); + +void page_cache_sync_readahead(struct address_space *mapping, + struct file_ra_state *ra, + struct file *filp, + pgoff_t offset, + unsigned long size); + +void page_cache_async_readahead(struct address_space *mapping, + struct file_ra_state *ra, + struct file *filp, + struct page *pg, + pgoff_t offset, + unsigned long size); + unsigned long max_sane_readahead(unsigned long nr); /* Do stack extension */ @@ -1135,6 +1164,8 @@ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); #ifdef CONFIG_IA64 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #endif +extern int expand_stack_downwards(struct vm_area_struct *vma, + unsigned long address); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); diff --git a/include/linux/namei.h b/include/linux/namei.h index b7dd24917f0d..6c38efbd810f 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,8 +69,8 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struc #define user_path_walk_link(name,nd) \ __user_walk_fd(AT_FDCWD, name, 0, nd) extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -extern int FASTCALL(path_walk(const char *, struct nameidata *)); -extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct nameidata *); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 78feb7beff75..5cd192469096 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -116,18 +116,7 @@ struct svc_expkey { #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) -static inline int EX_RDONLY(struct svc_export *exp, struct svc_rqst *rqstp) -{ - struct exp_flavor_info *f; - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; - - for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_flavor) - return f->flags & NFSEXP_READONLY; - } - return exp->ex_flags & NFSEXP_READONLY; -} - +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 576f2bb34cc8..be3f2bb6fcf3 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -212,5 +212,11 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +/* Hibernation and suspend events */ +#define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ +#define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ +#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ +#define PM_POST_SUSPEND 0x0004 /* Suspend finished */ + #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 731cd2ac3227..209d3a47f50f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -90,6 +90,9 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_buddy 19 /* Page is free, on buddy lists */ +/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +#define PG_readahead PG_reclaim /* Reminder to do async read-ahead */ + /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ @@ -186,37 +189,15 @@ static inline void SetPageUptodate(struct page *page) #define __SetPagePrivate(page) __set_bit(PG_private, &(page)->flags) #define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags) +/* + * Only test-and-set exist for PG_writeback. The unconditional operators are + * risky: they bypass page accounting. + */ #define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) -#define SetPageWriteback(page) \ - do { \ - if (!test_and_set_bit(PG_writeback, \ - &(page)->flags)) \ - inc_zone_page_state(page, NR_WRITEBACK); \ - } while (0) -#define TestSetPageWriteback(page) \ - ({ \ - int ret; \ - ret = test_and_set_bit(PG_writeback, \ - &(page)->flags); \ - if (!ret) \ - inc_zone_page_state(page, NR_WRITEBACK); \ - ret; \ - }) -#define ClearPageWriteback(page) \ - do { \ - if (test_and_clear_bit(PG_writeback, \ - &(page)->flags)) \ - dec_zone_page_state(page, NR_WRITEBACK); \ - } while (0) -#define TestClearPageWriteback(page) \ - ({ \ - int ret; \ - ret = test_and_clear_bit(PG_writeback, \ - &(page)->flags); \ - if (ret) \ - dec_zone_page_state(page, NR_WRITEBACK); \ - ret; \ - }) +#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback, \ + &(page)->flags) +#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback, \ + &(page)->flags) #define PageBuddy(page) test_bit(PG_buddy, &(page)->flags) #define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags) @@ -226,6 +207,10 @@ static inline void SetPageUptodate(struct page *page) #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) +#define PageReadahead(page) test_bit(PG_readahead, &(page)->flags) +#define SetPageReadahead(page) set_bit(PG_readahead, &(page)->flags) +#define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags) + #define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags) #define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags) #define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2c7add169539..b15c6498fe67 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -495,6 +495,8 @@ #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD_K8_NB 0x1100 +#define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 +#define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 @@ -2209,6 +2211,7 @@ #define PCI_DEVICE_ID_INTEL_82915GM_IG 0x2592 #define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 #define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 +#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 #define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27A0 #define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27A2 #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 diff --git a/include/linux/pm.h b/include/linux/pm.h index 273781c82e4d..ad3cc2eb0d34 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -101,6 +101,7 @@ struct pm_dev */ extern void (*pm_idle)(void); extern void (*pm_power_off)(void); +extern void (*pm_power_off_prepare)(void); typedef int __bitwise suspend_state_t; @@ -284,8 +285,6 @@ extern int device_prepare_suspend(pm_message_t state); #define device_may_wakeup(dev) \ (device_can_wakeup(dev) && (dev)->power.should_wakeup) -extern int dpm_runtime_suspend(struct device *, pm_message_t); -extern void dpm_runtime_resume(struct device *); extern void __suspend_report_result(const char *function, void *fn, int ret); #define suspend_report_result(fn, ret) \ @@ -317,15 +316,6 @@ static inline int device_suspend(pm_message_t state) #define device_set_wakeup_enable(dev,val) do{}while(0) #define device_may_wakeup(dev) (0) -static inline int dpm_runtime_suspend(struct device * dev, pm_message_t state) -{ - return 0; -} - -static inline void dpm_runtime_resume(struct device * dev) -{ -} - #define suspend_report_result(fn, ret) do { } while (0) static inline int call_platform_enable_wakeup(struct device *dev, int is_on) diff --git a/include/linux/sched.h b/include/linux/sched.h index 731edaca8ffd..94f624aef017 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -345,6 +345,27 @@ typedef unsigned long mm_counter_t; (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +extern void set_dumpable(struct mm_struct *mm, int value); +extern int get_dumpable(struct mm_struct *mm); + +/* mm flags */ +/* dumpable bits */ +#define MMF_DUMPABLE 0 /* core dump is permitted */ +#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ +#define MMF_DUMPABLE_BITS 2 + +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 4 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) + struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -402,7 +423,7 @@ struct mm_struct { unsigned int token_priority; unsigned int last_interval; - unsigned char dumpable:2; + unsigned long flags; /* Must use atomic bitops to access the bits */ /* coredumping support */ int core_waiters; diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 210549ba4ef4..f6a3a951b79e 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,14 +9,14 @@ * Released under the General Public License (GPL). */ -#include <linux/lockdep.h> - #if defined(CONFIG_SMP) # include <asm/spinlock_types.h> #else # include <linux/spinlock_types_up.h> #endif +#include <linux/lockdep.h> + typedef struct { raw_spinlock_t raw_lock; #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 27644af20b7c..04135b0e198e 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -12,14 +12,10 @@ * Released under the General Public License (GPL). */ -#if defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_DEBUG_LOCK_ALLOC) +#ifdef CONFIG_DEBUG_SPINLOCK typedef struct { volatile unsigned int slock; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } @@ -34,9 +30,6 @@ typedef struct { } raw_spinlock_t; typedef struct { /* no debug version on UP */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif } raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { } diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 1d2b084c0185..e7fa657d0c49 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -13,7 +13,7 @@ extern void save_stack_trace(struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); #else # define save_stack_trace(trace) do { } while (0) -# define print_stack_trace(trace) do { } while (0) +# define print_stack_trace(trace, spaces) do { } while (0) #endif #endif diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 9c7cb6430666..e8e6da394c92 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -43,14 +43,19 @@ static inline void pm_restore_console(void) {} * @prepare: prepare system for hibernation * @enter: shut down system after state has been saved to disk * @finish: finish/clean up after state has been reloaded + * @pre_restore: prepare system for the restoration from a hibernation image + * @restore_cleanup: clean up after a failing image restoration */ struct hibernation_ops { int (*prepare)(void); int (*enter)(void); void (*finish)(void); + int (*pre_restore)(void); + void (*restore_cleanup)(void); }; -#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) +#ifdef CONFIG_PM +#ifdef CONFIG_SOFTWARE_SUSPEND /* kernel/power/snapshot.c */ extern void __register_nosave_region(unsigned long b, unsigned long e, int km); static inline void register_nosave_region(unsigned long b, unsigned long e) @@ -68,16 +73,14 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct hibernation_ops *ops); extern int hibernate(void); -#else -static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} +#else /* CONFIG_SOFTWARE_SUSPEND */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } -#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */ +#endif /* CONFIG_SOFTWARE_SUSPEND */ void save_processor_state(void); void restore_processor_state(void); @@ -85,4 +88,43 @@ struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); +/* kernel/power/main.c */ +extern struct blocking_notifier_head pm_chain_head; + +static inline int register_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&pm_chain_head, nb); +} + +static inline int unregister_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&pm_chain_head, nb); +} + +#define pm_notifier(fn, pri) { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = pri }; \ + register_pm_notifier(&fn##_nb); \ +} +#else /* CONFIG_PM */ + +static inline int register_pm_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_pm_notifier(struct notifier_block *nb) +{ + return 0; +} + +#define pm_notifier(fn, pri) do { (void)(fn); } while (0) +#endif /* CONFIG_PM */ + +#if !defined CONFIG_SOFTWARE_SUSPEND || !defined(CONFIG_PM) +static inline void register_nosave_region(unsigned long b, unsigned long e) +{ +} +#endif + #endif /* _LINUX_SWSUSP_H */ diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h new file mode 100644 index 000000000000..44c28e94df50 --- /dev/null +++ b/include/linux/uio_driver.h @@ -0,0 +1,91 @@ +/* + * include/linux/uio_driver.h + * + * Copyright(C) 2005, Benedikt Spranger <b.spranger@linutronix.de> + * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2006, Hans J. Koch <hjk@linutronix.de> + * Copyright(C) 2006, Greg Kroah-Hartman <greg@kroah.com> + * + * Userspace IO driver. + * + * Licensed under the GPLv2 only. + */ + +#ifndef _UIO_DRIVER_H_ +#define _UIO_DRIVER_H_ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/interrupt.h> + +/** + * struct uio_mem - description of a UIO memory region + * @kobj: kobject for this mapping + * @addr: address of the device's memory + * @size: size of IO + * @memtype: type of memory addr points to + * @internal_addr: ioremap-ped version of addr, for driver internal use + */ +struct uio_mem { + struct kobject kobj; + unsigned long addr; + unsigned long size; + int memtype; + void __iomem *internal_addr; +}; + +#define MAX_UIO_MAPS 5 + +struct uio_device; + +/** + * struct uio_info - UIO device capabilities + * @uio_dev: the UIO device this info belongs to + * @name: device name + * @version: device driver version + * @mem: list of mappable memory regions, size==0 for end of list + * @irq: interrupt number or UIO_IRQ_CUSTOM + * @irq_flags: flags for request_irq() + * @priv: optional private data + * @handler: the device's irq handler + * @mmap: mmap operation for this uio device + * @open: open operation for this uio device + * @release: release operation for this uio device + */ +struct uio_info { + struct uio_device *uio_dev; + char *name; + char *version; + struct uio_mem mem[MAX_UIO_MAPS]; + long irq; + unsigned long irq_flags; + void *priv; + irqreturn_t (*handler)(int irq, struct uio_info *dev_info); + int (*mmap)(struct uio_info *info, struct vm_area_struct *vma); + int (*open)(struct uio_info *info, struct inode *inode); + int (*release)(struct uio_info *info, struct inode *inode); +}; + +extern int __must_check + __uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info); +static inline int __must_check + uio_register_device(struct device *parent, struct uio_info *info) +{ + return __uio_register_device(THIS_MODULE, parent, info); +} +extern void uio_unregister_device(struct uio_info *info); +extern void uio_event_notify(struct uio_info *info); + +/* defines for uio_device->irq */ +#define UIO_IRQ_CUSTOM -1 +#define UIO_IRQ_NONE -2 + +/* defines for uio_device->memtype */ +#define UIO_MEM_NONE 0 +#define UIO_MEM_PHYS 1 +#define UIO_MEM_LOGICAL 2 +#define UIO_MEM_VIRTUAL 3 + +#endif /* _LINUX_UIO_DRIVER_H_ */ diff --git a/ipc/shm.c b/ipc/shm.c index 242c3f66493a..d0259e3ad1c0 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -224,13 +224,12 @@ static void shm_close(struct vm_area_struct *vma) mutex_unlock(&shm_ids(ns).mutex); } -static struct page *shm_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); - return sfd->vm_ops->nopage(vma, address, type); + return sfd->vm_ops->fault(vma, vmf); } #ifdef CONFIG_NUMA @@ -269,6 +268,7 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma) if (ret != 0) return ret; sfd->vm_ops = vma->vm_ops; + BUG_ON(!sfd->vm_ops->fault); vma->vm_ops = &shm_vm_ops; shm_open(vma); @@ -327,7 +327,7 @@ static const struct file_operations shm_file_operations = { static struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ - .nopage = shm_nopage, + .fault = shm_fault, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b7640a5f382a..145cbb79c4b9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -153,7 +153,7 @@ struct audit_aux_data_execve { struct audit_aux_data d; int argc; int envc; - char mem[0]; + struct mm_struct *mm; }; struct audit_aux_data_socketcall { @@ -831,6 +831,55 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, return rc; } +static void audit_log_execve_info(struct audit_buffer *ab, + struct audit_aux_data_execve *axi) +{ + int i; + long len, ret; + const char __user *p = (const char __user *)axi->mm->arg_start; + char *buf; + + if (axi->mm != current->mm) + return; /* execve failed, no additional info */ + + for (i = 0; i < axi->argc; i++, p += len) { + len = strnlen_user(p, MAX_ARG_STRLEN); + /* + * We just created this mm, if we can't find the strings + * we just copied into it something is _very_ wrong. Similar + * for strings that are too long, we should not have created + * any. + */ + if (!len || len > MAX_ARG_STRLEN) { + WARN_ON(1); + send_sig(SIGKILL, current, 0); + } + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + audit_panic("out of memory for argv string\n"); + break; + } + + ret = copy_from_user(buf, p, len); + /* + * There is no reason for this copy to be short. We just + * copied them here, and the mm hasn't been exposed to user- + * space yet. + */ + if (!ret) { + WARN_ON(1); + send_sig(SIGKILL, current, 0); + } + + audit_log_format(ab, "a%d=", i); + audit_log_untrustedstring(ab, buf); + audit_log_format(ab, "\n"); + + kfree(buf); + } +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { int i, call_panic = 0; @@ -971,13 +1020,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts case AUDIT_EXECVE: { struct audit_aux_data_execve *axi = (void *)aux; - int i; - const char *p; - for (i = 0, p = axi->mem; i < axi->argc; i++) { - audit_log_format(ab, "a%d=", i); - p = audit_log_untrustedstring(ab, p); - audit_log_format(ab, "\n"); - } + audit_log_execve_info(ab, axi); break; } case AUDIT_SOCKETCALL: { @@ -1821,32 +1864,31 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode return 0; } +int audit_argv_kb = 32; + int audit_bprm(struct linux_binprm *bprm) { struct audit_aux_data_execve *ax; struct audit_context *context = current->audit_context; - unsigned long p, next; - void *to; if (likely(!audit_enabled || !context || context->dummy)) return 0; - ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, - GFP_KERNEL); + /* + * Even though the stack code doesn't limit the arg+env size any more, + * the audit code requires that _all_ arguments be logged in a single + * netlink skb. Hence cap it :-( + */ + if (bprm->argv_len > (audit_argv_kb << 10)) + return -E2BIG; + + ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) return -ENOMEM; ax->argc = bprm->argc; ax->envc = bprm->envc; - for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { - struct page *page = bprm->page[p / PAGE_SIZE]; - void *kaddr = kmap(page); - next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); - memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); - to += next - p; - kunmap(page); - } - + ax->mm = bprm->mm; ax->d.type = AUDIT_EXECVE; ax->d.next = context->aux; context->aux = (void *)ax; diff --git a/kernel/exit.c b/kernel/exit.c index e8af8d0c2483..464c2b172f07 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -45,6 +45,7 @@ #include <linux/resource.h> #include <linux/blkdev.h> #include <linux/task_io_accounting_ops.h> +#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -594,6 +595,8 @@ static void exit_mm(struct task_struct * tsk) tsk->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); + /* We don't want this task to be frozen prematurely */ + clear_freeze_flag(tsk); task_unlock(tsk); mmput(mm); } diff --git a/kernel/fork.c b/kernel/fork.c index ba39bdb2a7b8..469838998220 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -334,6 +334,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); + mm->flags = (current->mm) ? current->mm->flags + : MMF_DUMP_FILTER_DEFAULT; mm->core_waiters = 0; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); diff --git a/kernel/futex.c b/kernel/futex.c index 5c3f45d07c53..a12425051ee9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -346,15 +346,20 @@ static int futex_handle_fault(unsigned long address, vma = find_vma(mm, address); if (vma && address >= vma->vm_start && (vma->vm_flags & VM_WRITE)) { - switch (handle_mm_fault(mm, vma, address, 1)) { - case VM_FAULT_MINOR: - ret = 0; - current->min_flt++; - break; - case VM_FAULT_MAJOR: + int fault; + fault = handle_mm_fault(mm, vma, address, 1); + if (unlikely((fault & VM_FAULT_ERROR))) { +#if 0 + /* XXX: let's do this when we verify it is OK */ + if (ret & VM_FAULT_OOM) + ret = -ENOMEM; +#endif + } else { ret = 0; - current->maj_flt++; - break; + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; } } if (!fshared) diff --git a/kernel/kmod.c b/kernel/kmod.c index 78d365c524ed..beedbdc64608 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -33,6 +33,8 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/resource.h> +#include <linux/notifier.h> +#include <linux/suspend.h> #include <asm/uaccess.h> extern int max_threads; @@ -265,6 +267,88 @@ static void __call_usermodehelper(struct work_struct *work) } } +#ifdef CONFIG_PM +/* + * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY + * (used for preventing user land processes from being created after the user + * land has been frozen during a system-wide hibernation or suspend operation). + */ +static int usermodehelper_disabled; + +/* Number of helpers running */ +static atomic_t running_helpers = ATOMIC_INIT(0); + +/* + * Wait queue head used by usermodehelper_pm_callback() to wait for all running + * helpers to finish. + */ +static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); + +/* + * Time to wait for running_helpers to become zero before the setting of + * usermodehelper_disabled in usermodehelper_pm_callback() fails + */ +#define RUNNING_HELPERS_TIMEOUT (5 * HZ) + +static int usermodehelper_pm_callback(struct notifier_block *nfb, + unsigned long action, + void *ignored) +{ + long retval; + + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + usermodehelper_disabled = 1; + smp_mb(); + /* + * From now on call_usermodehelper_exec() won't start any new + * helpers, so it is sufficient if running_helpers turns out to + * be zero at one point (it may be increased later, but that + * doesn't matter). + */ + retval = wait_event_timeout(running_helpers_waitq, + atomic_read(&running_helpers) == 0, + RUNNING_HELPERS_TIMEOUT); + if (retval) { + return NOTIFY_OK; + } else { + usermodehelper_disabled = 0; + return NOTIFY_BAD; + } + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + usermodehelper_disabled = 0; + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static void helper_lock(void) +{ + atomic_inc(&running_helpers); + smp_mb__after_atomic_inc(); +} + +static void helper_unlock(void) +{ + if (atomic_dec_and_test(&running_helpers)) + wake_up(&running_helpers_waitq); +} + +static void register_pm_notifier_callback(void) +{ + pm_notifier(usermodehelper_pm_callback, 0); +} +#else /* CONFIG_PM */ +#define usermodehelper_disabled 0 + +static inline void helper_lock(void) {} +static inline void helper_unlock(void) {} +static inline void register_pm_notifier_callback(void) {} +#endif /* CONFIG_PM */ + /** * call_usermodehelper_setup - prepare to call a usermode helper * @path - path to usermode executable @@ -369,12 +453,13 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, DECLARE_COMPLETION_ONSTACK(done); int retval; + helper_lock(); if (sub_info->path[0] == '\0') { retval = 0; goto out; } - if (!khelper_wq) { + if (!khelper_wq || usermodehelper_disabled) { retval = -EBUSY; goto out; } @@ -390,6 +475,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, out: call_usermodehelper_freeinfo(sub_info); + helper_unlock(); return retval; } EXPORT_SYMBOL(call_usermodehelper_exec); @@ -431,4 +517,5 @@ void __init usermodehelper_init(void) { khelper_wq = create_singlethread_workqueue("khelper"); BUG_ON(!khelper_wq); + register_pm_notifier_callback(); } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9e47d8c493f3..3e9f513a728d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -675,9 +675,18 @@ static struct notifier_block kprobe_exceptions_nb = { .priority = 0x7fffffff /* we need to be notified first */ }; +unsigned long __weak arch_deref_entry_point(void *entry) +{ + return (unsigned long)entry; +} int __kprobes register_jprobe(struct jprobe *jp) { + unsigned long addr = arch_deref_entry_point(jp->entry); + + if (!kernel_text_address(addr)) + return -EINVAL; + /* Todo: Verify probepoint is a function entry point */ jp->kp.pre_handler = setjmp_pre_handler; jp->kp.break_handler = longjmp_break_handler; diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 559deca5ed15..2565e1b6dd7b 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -62,6 +62,28 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page) KERNEL_ATTR_RO(kexec_crash_loaded); #endif /* CONFIG_KEXEC */ +/* + * Make /sys/kernel/notes give the raw contents of our kernel .notes section. + */ +extern const char __start_notes __attribute__((weak)); +extern const char __stop_notes __attribute__((weak)); +#define notes_size (&__stop_notes - &__start_notes) + +static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + memcpy(buf, &__start_notes + off, count); + return count; +} + +static struct bin_attribute notes_attr = { + .attr = { + .name = "notes", + .mode = S_IRUGO, + }, + .read = ¬es_read, +}; + decl_subsys(kernel, NULL, NULL); EXPORT_SYMBOL_GPL(kernel_subsys); @@ -88,6 +110,12 @@ static int __init ksysfs_init(void) error = sysfs_create_group(&kernel_subsys.kobj, &kernel_attr_group); + if (!error && notes_size > 0) { + notes_attr.size = notes_size; + error = sysfs_create_bin_file(&kernel_subsys.kobj, + ¬es_attr); + } + return error; } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index edba2ffb43de..734da579ad13 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -5,7 +5,8 @@ * * Started by Ingo Molnar: * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * this code maps all the lock dependencies as they occur in a live kernel * and will warn about the following classes of locking bugs: @@ -37,11 +38,26 @@ #include <linux/debug_locks.h> #include <linux/irqflags.h> #include <linux/utsname.h> +#include <linux/hash.h> #include <asm/sections.h> #include "lockdep_internals.h" +#ifdef CONFIG_PROVE_LOCKING +int prove_locking = 1; +module_param(prove_locking, int, 0644); +#else +#define prove_locking 0 +#endif + +#ifdef CONFIG_LOCK_STAT +int lock_stat = 1; +module_param(lock_stat, int, 0644); +#else +#define lock_stat 0 +#endif + /* * lockdep_lock: protects the lockdep graph, the hashes and the * class/list/hash allocators. @@ -96,23 +112,6 @@ unsigned long nr_list_entries; static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; /* - * Allocate a lockdep entry. (assumes the graph_lock held, returns - * with NULL on failure) - */ -static struct lock_list *alloc_list_entry(void) -{ - if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { - if (!debug_locks_off_graph_unlock()) - return NULL; - - printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); - printk("turning off the locking correctness validator.\n"); - return NULL; - } - return list_entries + nr_list_entries++; -} - -/* * All data structures here are protected by the global debug_lock. * * Mutex key structs only get allocated, once during bootup, and never @@ -121,6 +120,117 @@ static struct lock_list *alloc_list_entry(void) unsigned long nr_lock_classes; static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +#ifdef CONFIG_LOCK_STAT +static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); + +static int lock_contention_point(struct lock_class *class, unsigned long ip) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + if (class->contention_point[i] == 0) { + class->contention_point[i] = ip; + break; + } + if (class->contention_point[i] == ip) + break; + } + + return i; +} + +static void lock_time_inc(struct lock_time *lt, s64 time) +{ + if (time > lt->max) + lt->max = time; + + if (time < lt->min || !lt->min) + lt->min = time; + + lt->total += time; + lt->nr++; +} + +static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) +{ + dst->min += src->min; + dst->max += src->max; + dst->total += src->total; + dst->nr += src->nr; +} + +struct lock_class_stats lock_stats(struct lock_class *class) +{ + struct lock_class_stats stats; + int cpu, i; + + memset(&stats, 0, sizeof(struct lock_class_stats)); + for_each_possible_cpu(cpu) { + struct lock_class_stats *pcs = + &per_cpu(lock_stats, cpu)[class - lock_classes]; + + for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) + stats.contention_point[i] += pcs->contention_point[i]; + + lock_time_add(&pcs->read_waittime, &stats.read_waittime); + lock_time_add(&pcs->write_waittime, &stats.write_waittime); + + lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); + lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); + + for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) + stats.bounces[i] += pcs->bounces[i]; + } + + return stats; +} + +void clear_lock_stats(struct lock_class *class) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct lock_class_stats *cpu_stats = + &per_cpu(lock_stats, cpu)[class - lock_classes]; + + memset(cpu_stats, 0, sizeof(struct lock_class_stats)); + } + memset(class->contention_point, 0, sizeof(class->contention_point)); +} + +static struct lock_class_stats *get_lock_stats(struct lock_class *class) +{ + return &get_cpu_var(lock_stats)[class - lock_classes]; +} + +static void put_lock_stats(struct lock_class_stats *stats) +{ + put_cpu_var(lock_stats); +} + +static void lock_release_holdtime(struct held_lock *hlock) +{ + struct lock_class_stats *stats; + s64 holdtime; + + if (!lock_stat) + return; + + holdtime = sched_clock() - hlock->holdtime_stamp; + + stats = get_lock_stats(hlock->class); + if (hlock->read) + lock_time_inc(&stats->read_holdtime, holdtime); + else + lock_time_inc(&stats->write_holdtime, holdtime); + put_lock_stats(stats); +} +#else +static inline void lock_release_holdtime(struct held_lock *hlock) +{ +} +#endif + /* * We keep a global list of all lock classes. The list only grows, * never shrinks. The list is only accessed with the lockdep @@ -133,24 +243,18 @@ LIST_HEAD(all_lock_classes); */ #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) -#define CLASSHASH_MASK (CLASSHASH_SIZE - 1) -#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) +#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) #define classhashentry(key) (classhash_table + __classhashfn((key))) static struct list_head classhash_table[CLASSHASH_SIZE]; -unsigned long nr_lock_chains; -static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; - /* * We put the lock dependency chains into a hash-table as well, to cache * their existence: */ #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) -#define CHAINHASH_MASK (CHAINHASH_SIZE - 1) -#define __chainhashfn(chain) \ - (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) +#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) static struct list_head chainhash_table[CHAINHASH_SIZE]; @@ -223,26 +327,6 @@ static int verbose(struct lock_class *class) return 0; } -#ifdef CONFIG_TRACE_IRQFLAGS - -static int hardirq_verbose(struct lock_class *class) -{ -#if HARDIRQ_VERBOSE - return class_filter(class); -#endif - return 0; -} - -static int softirq_verbose(struct lock_class *class) -{ -#if SOFTIRQ_VERBOSE - return class_filter(class); -#endif - return 0; -} - -#endif - /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the graph_lock. @@ -291,6 +375,11 @@ unsigned int max_recursion_depth; * about it later on, in lockdep_info(). */ static int lockdep_init_error; +static unsigned long lockdep_init_trace_data[20]; +static struct stack_trace lockdep_init_trace = { + .max_entries = ARRAY_SIZE(lockdep_init_trace_data), + .entries = lockdep_init_trace_data, +}; /* * Various lockdep statistics: @@ -482,6 +571,262 @@ static void print_lock_dependencies(struct lock_class *class, int depth) } } +static void print_kernel_version(void) +{ + printk("%s %.*s\n", init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE + return class_filter(class); +#endif + return 0; +} + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ + unsigned long start = (unsigned long) &_stext, + end = (unsigned long) &_end, + addr = (unsigned long) obj; +#ifdef CONFIG_SMP + int i; +#endif + + /* + * static variable? + */ + if ((addr >= start) && (addr < end)) + return 1; + +#ifdef CONFIG_SMP + /* + * percpu var? + */ + for_each_possible_cpu(i) { + start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); + end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM + + per_cpu_offset(i); + + if ((addr >= start) && (addr < end)) + return 1; + } +#endif + + /* + * module var? + */ + return is_module_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ + struct lock_class *class; + int count = 0; + + if (!new_class->name) + return 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (new_class->key - new_class->subclass == class->key) + return class->name_version; + if (class->name && !strcmp(class->name, new_class->name)) + count = max(count, class->name_version); + } + + return count + 1; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * If the architecture calls into lockdep before initializing + * the hashes then we'll warn about it later. (we cannot printk + * right now) + */ + if (unlikely(!lockdep_initialized)) { + lockdep_init(); + lockdep_init_error = 1; + save_stack_trace(&lockdep_init_trace); + } +#endif + + /* + * Static locks do not have their class-keys yet - for them the key + * is the lock object itself: + */ + if (unlikely(!lock->key)) + lock->key = (void *)lock; + + /* + * NOTE: the class-key must be unique. For dynamic locks, a static + * lock_class_key variable is passed in through the mutex_init() + * (or spin_lock_init()) call - which acts as the key. For static + * locks we use the lock object itself as the key. + */ + BUILD_BUG_ON(sizeof(struct lock_class_key) > + sizeof(struct lockdep_map)); + + key = lock->key->subkeys + subclass; + + hash_head = classhashentry(key); + + /* + * We can walk the hash lockfree, because the hash only + * grows, and we are careful when adding entries to the end: + */ + list_for_each_entry(class, hash_head, hash_entry) { + if (class->key == key) { + WARN_ON_ONCE(class->name != lock->name); + return class; + } + } + + return NULL; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + unsigned long flags; + + class = look_up_lock_class(lock, subclass); + if (likely(class)) + return class; + + /* + * Debug-check: all keys must be persistent! + */ + if (!static_obj(lock->key)) { + debug_locks_off(); + printk("INFO: trying to register non-static key.\n"); + printk("the code is fine but needs lockdep annotation.\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + + return NULL; + } + + key = lock->key->subkeys + subclass; + hash_head = classhashentry(key); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + /* + * We have to do the hash-walk again, to avoid races + * with another CPU: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_unlock_set; + /* + * Allocate a new key from the static array, and add it to + * the hash: + */ + if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { + if (!debug_locks_off_graph_unlock()) { + raw_local_irq_restore(flags); + return NULL; + } + raw_local_irq_restore(flags); + + printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + class = lock_classes + nr_lock_classes++; + debug_atomic_inc(&nr_unused_locks); + class->key = key; + class->name = lock->name; + class->subclass = subclass; + INIT_LIST_HEAD(&class->lock_entry); + INIT_LIST_HEAD(&class->locks_before); + INIT_LIST_HEAD(&class->locks_after); + class->name_version = count_matching_names(class); + /* + * We use RCU's safe list-add method to make + * parallel walking of the hash-list safe: + */ + list_add_tail_rcu(&class->hash_entry, hash_head); + + if (verbose(class)) { + graph_unlock(); + raw_local_irq_restore(flags); + + printk("\nnew class %p: %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + } +out_unlock_set: + graph_unlock(); + raw_local_irq_restore(flags); + + if (!subclass || force) + lock->class_cache = class; + + if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) + return NULL; + + return class; +} + +#ifdef CONFIG_PROVE_LOCKING +/* + * Allocate a lockdep entry. (assumes the graph_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ + if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { + if (!debug_locks_off_graph_unlock()) + return NULL; + + printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + return list_entries + nr_list_entries++; +} + /* * Add a new dependency to the head of the list: */ @@ -542,13 +887,6 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth) return 0; } -static void print_kernel_version(void) -{ - printk("%s %.*s\n", init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); -} - /* * When a circular dependency is detected, print the * header first: @@ -640,15 +978,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) return 1; } -static int very_verbose(struct lock_class *class) -{ -#if VERY_VERBOSE - return class_filter(class); -#endif - return 0; -} #ifdef CONFIG_TRACE_IRQFLAGS - /* * Forwards and backwards subgraph searching, for the purposes of * proving that two subgraphs can be connected by a new dependency @@ -821,6 +1151,78 @@ check_usage(struct task_struct *curr, struct held_lock *prev, bit_backwards, bit_forwards, irqclass); } +static int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + /* + * Prove that the new dependency does not connect a hardirq-safe + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; + + /* + * Prove that the new dependency does not connect a hardirq-safe-read + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, + LOCK_ENABLED_HARDIRQS, "hard-read")) + return 0; + + /* + * Prove that the new dependency does not connect a softirq-safe + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + /* + * Prove that the new dependency does not connect a softirq-safe-read + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at <prev>, and the + * forwards-subgraph starting at <next>: + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + + return 1; +} + +static void inc_chains(void) +{ + if (current->hardirq_context) + nr_hardirq_chains++; + else { + if (current->softirq_context) + nr_softirq_chains++; + else + nr_process_chains++; + } +} + +#else + +static inline int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + return 1; +} + +static inline void inc_chains(void) +{ + nr_process_chains++; +} + #endif static int @@ -922,47 +1324,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (!(check_noncircular(next->class, 0))) return print_circular_bug_tail(); -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * Prove that the new dependency does not connect a hardirq-safe - * lock with a hardirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, - LOCK_ENABLED_HARDIRQS, "hard")) + if (!check_prev_add_irq(curr, prev, next)) return 0; /* - * Prove that the new dependency does not connect a hardirq-safe-read - * lock with a hardirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, - LOCK_ENABLED_HARDIRQS, "hard-read")) - return 0; - - /* - * Prove that the new dependency does not connect a softirq-safe - * lock with a softirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; - /* - * Prove that the new dependency does not connect a softirq-safe-read - * lock with a softirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; -#endif - /* * For recursive read-locks we do all the dependency checks, * but we dont store read-triggered dependencies (only * write-triggered dependencies). This ensures that only the @@ -1088,224 +1453,8 @@ out_bug: return 0; } - -/* - * Is this the address of a static object: - */ -static int static_obj(void *obj) -{ - unsigned long start = (unsigned long) &_stext, - end = (unsigned long) &_end, - addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif - - /* - * static variable? - */ - if ((addr >= start) && (addr < end)) - return 1; - -#ifdef CONFIG_SMP - /* - * percpu var? - */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif - - /* - * module var? - */ - return is_module_address(addr); -} - -/* - * To make lock name printouts unique, we calculate a unique - * class->name_version generation counter: - */ -static int count_matching_names(struct lock_class *new_class) -{ - struct lock_class *class; - int count = 0; - - if (!new_class->name) - return 0; - - list_for_each_entry(class, &all_lock_classes, lock_entry) { - if (new_class->key - new_class->subclass == class->key) - return class->name_version; - if (class->name && !strcmp(class->name, new_class->name)) - count = max(count, class->name_version); - } - - return count + 1; -} - -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ -static inline struct lock_class * -look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) -{ - struct lockdep_subclass_key *key; - struct list_head *hash_head; - struct lock_class *class; - -#ifdef CONFIG_DEBUG_LOCKDEP - /* - * If the architecture calls into lockdep before initializing - * the hashes then we'll warn about it later. (we cannot printk - * right now) - */ - if (unlikely(!lockdep_initialized)) { - lockdep_init(); - lockdep_init_error = 1; - } -#endif - - /* - * Static locks do not have their class-keys yet - for them the key - * is the lock object itself: - */ - if (unlikely(!lock->key)) - lock->key = (void *)lock; - - /* - * NOTE: the class-key must be unique. For dynamic locks, a static - * lock_class_key variable is passed in through the mutex_init() - * (or spin_lock_init()) call - which acts as the key. For static - * locks we use the lock object itself as the key. - */ - BUILD_BUG_ON(sizeof(struct lock_class_key) > sizeof(struct lock_class)); - - key = lock->key->subkeys + subclass; - - hash_head = classhashentry(key); - - /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: - */ - list_for_each_entry(class, hash_head, hash_entry) - if (class->key == key) - return class; - - return NULL; -} - -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ -static inline struct lock_class * -register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) -{ - struct lockdep_subclass_key *key; - struct list_head *hash_head; - struct lock_class *class; - unsigned long flags; - - class = look_up_lock_class(lock, subclass); - if (likely(class)) - return class; - - /* - * Debug-check: all keys must be persistent! - */ - if (!static_obj(lock->key)) { - debug_locks_off(); - printk("INFO: trying to register non-static key.\n"); - printk("the code is fine but needs lockdep annotation.\n"); - printk("turning off the locking correctness validator.\n"); - dump_stack(); - - return NULL; - } - - key = lock->key->subkeys + subclass; - hash_head = classhashentry(key); - - raw_local_irq_save(flags); - if (!graph_lock()) { - raw_local_irq_restore(flags); - return NULL; - } - /* - * We have to do the hash-walk again, to avoid races - * with another CPU: - */ - list_for_each_entry(class, hash_head, hash_entry) - if (class->key == key) - goto out_unlock_set; - /* - * Allocate a new key from the static array, and add it to - * the hash: - */ - if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { - if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); - return NULL; - } - raw_local_irq_restore(flags); - - printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); - printk("turning off the locking correctness validator.\n"); - return NULL; - } - class = lock_classes + nr_lock_classes++; - debug_atomic_inc(&nr_unused_locks); - class->key = key; - class->name = lock->name; - class->subclass = subclass; - INIT_LIST_HEAD(&class->lock_entry); - INIT_LIST_HEAD(&class->locks_before); - INIT_LIST_HEAD(&class->locks_after); - class->name_version = count_matching_names(class); - /* - * We use RCU's safe list-add method to make - * parallel walking of the hash-list safe: - */ - list_add_tail_rcu(&class->hash_entry, hash_head); - - if (verbose(class)) { - graph_unlock(); - raw_local_irq_restore(flags); - - printk("\nnew class %p: %s", class->key, class->name); - if (class->name_version > 1) - printk("#%d", class->name_version); - printk("\n"); - dump_stack(); - - raw_local_irq_save(flags); - if (!graph_lock()) { - raw_local_irq_restore(flags); - return NULL; - } - } -out_unlock_set: - graph_unlock(); - raw_local_irq_restore(flags); - - if (!subclass || force) - lock->class_cache = class; - - if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) - return NULL; - - return class; -} +unsigned long nr_lock_chains; +static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; /* * Look up a dependency chain. If the key is not present yet then @@ -1366,21 +1515,72 @@ cache_hit: chain->chain_key = chain_key; list_add_tail_rcu(&chain->entry, hash_head); debug_atomic_inc(&chain_lookup_misses); -#ifdef CONFIG_TRACE_IRQFLAGS - if (current->hardirq_context) - nr_hardirq_chains++; - else { - if (current->softirq_context) - nr_softirq_chains++; - else - nr_process_chains++; - } -#else - nr_process_chains++; -#endif + inc_chains(); + + return 1; +} + +static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, + struct held_lock *hlock, int chain_head) +{ + /* + * Trylock needs to maintain the stack of held locks, but it + * does not add new dependencies, because trylock can be done + * in any order. + * + * We look up the chain_key and do the O(N^2) check and update of + * the dependencies only if this is a new dependency chain. + * (If lookup_chain_cache() returns with 1 it acquires + * graph_lock for us) + */ + if (!hlock->trylock && (hlock->check == 2) && + lookup_chain_cache(curr->curr_chain_key, hlock->class)) { + /* + * Check whether last held lock: + * + * - is irq-safe, if this lock is irq-unsafe + * - is softirq-safe, if this lock is hardirq-unsafe + * + * And check whether the new lock's dependency graph + * could lead back to the previous lock. + * + * any of these scenarios could lead to a deadlock. If + * All validations + */ + int ret = check_deadlock(curr, hlock, lock, hlock->read); + + if (!ret) + return 0; + /* + * Mark recursive read, as we jump over it when + * building dependencies (just like we jump over + * trylock entries): + */ + if (ret == 2) + hlock->read = 2; + /* + * Add dependency only if this lock is not the head + * of the chain, and if it's not a secondary read-lock: + */ + if (!chain_head && ret != 2) + if (!check_prevs_add(curr, hlock)) + return 0; + graph_unlock(); + } else + /* after lookup_chain_cache(): */ + if (unlikely(!debug_locks)) + return 0; return 1; } +#else +static inline int validate_chain(struct task_struct *curr, + struct lockdep_map *lock, struct held_lock *hlock, + int chain_head) +{ + return 1; +} +#endif /* * We are building curr_chain_key incrementally, so double-check @@ -1425,6 +1625,57 @@ static void check_chain_key(struct task_struct *curr) #endif } +static int +print_usage_bug(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +{ + if (!debug_locks_off_graph_unlock() || debug_locks_silent) + return 0; + + printk("\n=================================\n"); + printk( "[ INFO: inconsistent lock state ]\n"); + print_kernel_version(); + printk( "---------------------------------\n"); + + printk("inconsistent {%s} -> {%s} usage.\n", + usage_str[prev_bit], usage_str[new_bit]); + + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", + curr->comm, curr->pid, + trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, + trace_hardirqs_enabled(curr), + trace_softirqs_enabled(curr)); + print_lock(this); + + printk("{%s} state was registered at:\n", usage_str[prev_bit]); + print_stack_trace(this->class->usage_traces + prev_bit, 1); + + print_irqtrace_events(curr); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Print out an error if an invalid bit is set: + */ +static inline int +valid_state(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +{ + if (unlikely(this->class->usage_mask & (1 << bad_bit))) + return print_usage_bug(curr, this, bad_bit, new_bit); + return 1; +} + +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit); + #ifdef CONFIG_TRACE_IRQFLAGS /* @@ -1518,90 +1769,30 @@ void print_irqtrace_events(struct task_struct *curr) print_ip_sym(curr->softirq_disable_ip); } -#endif - -static int -print_usage_bug(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +static int hardirq_verbose(struct lock_class *class) { - if (!debug_locks_off_graph_unlock() || debug_locks_silent) - return 0; - - printk("\n=================================\n"); - printk( "[ INFO: inconsistent lock state ]\n"); - print_kernel_version(); - printk( "---------------------------------\n"); - - printk("inconsistent {%s} -> {%s} usage.\n", - usage_str[prev_bit], usage_str[new_bit]); - - printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", - curr->comm, curr->pid, - trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, - trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - trace_hardirqs_enabled(curr), - trace_softirqs_enabled(curr)); - print_lock(this); - - printk("{%s} state was registered at:\n", usage_str[prev_bit]); - print_stack_trace(this->class->usage_traces + prev_bit, 1); - - print_irqtrace_events(curr); - printk("\nother info that might help us debug this:\n"); - lockdep_print_held_locks(curr); - - printk("\nstack backtrace:\n"); - dump_stack(); - +#if HARDIRQ_VERBOSE + return class_filter(class); +#endif return 0; } -/* - * Print out an error if an invalid bit is set: - */ -static inline int -valid_state(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +static int softirq_verbose(struct lock_class *class) { - if (unlikely(this->class->usage_mask & (1 << bad_bit))) - return print_usage_bug(curr, this, bad_bit, new_bit); - return 1; +#if SOFTIRQ_VERBOSE + return class_filter(class); +#endif + return 0; } #define STRICT_READ_CHECKS 1 -/* - * Mark a lock with a usage bit, and validate the state transition: - */ -static int mark_lock(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) +static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) { - unsigned int new_mask = 1 << new_bit, ret = 1; - - /* - * If already set then do not dirty the cacheline, - * nor do any checks: - */ - if (likely(this->class->usage_mask & new_mask)) - return 1; - - if (!graph_lock()) - return 0; - /* - * Make sure we didnt race: - */ - if (unlikely(this->class->usage_mask & new_mask)) { - graph_unlock(); - return 1; - } - - this->class->usage_mask |= new_mask; + int ret = 1; - if (!save_trace(this->class->usage_traces + new_bit)) - return 0; - - switch (new_bit) { -#ifdef CONFIG_TRACE_IRQFLAGS + switch(new_bit) { case LOCK_USED_IN_HARDIRQ: if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) return 0; @@ -1760,37 +1951,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, if (softirq_verbose(this->class)) ret = 2; break; -#endif - case LOCK_USED: - /* - * Add it to the global list of classes: - */ - list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); - debug_atomic_dec(&nr_unused_locks); - break; default: - if (!debug_locks_off_graph_unlock()) - return 0; WARN_ON(1); - return 0; - } - - graph_unlock(); - - /* - * We must printk outside of the graph_lock: - */ - if (ret == 2) { - printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); - print_lock(this); - print_irqtrace_events(curr); - dump_stack(); + break; } return ret; } -#ifdef CONFIG_TRACE_IRQFLAGS /* * Mark all held locks with a usage bit: */ @@ -1973,9 +2141,176 @@ void trace_softirqs_off(unsigned long ip) debug_atomic_inc(&redundant_softirqs_off); } +static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) +{ + /* + * If non-trylock use in a hardirq or softirq context, then + * mark the lock as used in these contexts: + */ + if (!hlock->trylock) { + if (hlock->read) { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_HARDIRQ_READ)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_SOFTIRQ_READ)) + return 0; + } else { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) + return 0; + } + } + if (!hlock->hardirqs_off) { + if (hlock->read) { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS_READ)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS_READ)) + return 0; + } else { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS)) + return 0; + } + } + + return 1; +} + +static int separate_irq_context(struct task_struct *curr, + struct held_lock *hlock) +{ + unsigned int depth = curr->lockdep_depth; + + /* + * Keep track of points where we cross into an interrupt context: + */ + hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + + curr->softirq_context; + if (depth) { + struct held_lock *prev_hlock; + + prev_hlock = curr->held_locks + depth-1; + /* + * If we cross into another context, reset the + * hash key (this also prevents the checking and the + * adding of the dependency to 'prev'): + */ + if (prev_hlock->irq_context != hlock->irq_context) + return 1; + } + return 0; +} + +#else + +static inline +int mark_lock_irq(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + WARN_ON(1); + return 1; +} + +static inline int mark_irqflags(struct task_struct *curr, + struct held_lock *hlock) +{ + return 1; +} + +static inline int separate_irq_context(struct task_struct *curr, + struct held_lock *hlock) +{ + return 0; +} + #endif /* + * Mark a lock with a usage bit, and validate the state transition: + */ +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) +{ + unsigned int new_mask = 1 << new_bit, ret = 1; + + /* + * If already set then do not dirty the cacheline, + * nor do any checks: + */ + if (likely(this->class->usage_mask & new_mask)) + return 1; + + if (!graph_lock()) + return 0; + /* + * Make sure we didnt race: + */ + if (unlikely(this->class->usage_mask & new_mask)) { + graph_unlock(); + return 1; + } + + this->class->usage_mask |= new_mask; + + if (!save_trace(this->class->usage_traces + new_bit)) + return 0; + + switch (new_bit) { + case LOCK_USED_IN_HARDIRQ: + case LOCK_USED_IN_SOFTIRQ: + case LOCK_USED_IN_HARDIRQ_READ: + case LOCK_USED_IN_SOFTIRQ_READ: + case LOCK_ENABLED_HARDIRQS: + case LOCK_ENABLED_SOFTIRQS: + case LOCK_ENABLED_HARDIRQS_READ: + case LOCK_ENABLED_SOFTIRQS_READ: + ret = mark_lock_irq(curr, this, new_bit); + if (!ret) + return 0; + break; + case LOCK_USED: + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); + debug_atomic_dec(&nr_unused_locks); + break; + default: + if (!debug_locks_off_graph_unlock()) + return 0; + WARN_ON(1); + return 0; + } + + graph_unlock(); + + /* + * We must printk outside of the graph_lock: + */ + if (ret == 2) { + printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); + print_lock(this); + print_irqtrace_events(curr); + dump_stack(); + } + + return ret; +} + +/* * Initialize a lock instance's lock-class mapping info: */ void lockdep_init_map(struct lockdep_map *lock, const char *name, @@ -1999,6 +2334,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, lock->name = name; lock->key = key; lock->class_cache = NULL; +#ifdef CONFIG_LOCK_STAT + lock->cpu = raw_smp_processor_id(); +#endif if (subclass) register_lock_class(lock, subclass, 1); } @@ -2020,6 +2358,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int chain_head = 0; u64 chain_key; + if (!prove_locking) + check = 1; + if (unlikely(!debug_locks)) return 0; @@ -2070,57 +2411,18 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->read = read; hlock->check = check; hlock->hardirqs_off = hardirqs_off; - - if (check != 2) - goto out_calc_hash; -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If non-trylock use in a hardirq or softirq context, then - * mark the lock as used in these contexts: - */ - if (!trylock) { - if (read) { - if (curr->hardirq_context) - if (!mark_lock(curr, hlock, - LOCK_USED_IN_HARDIRQ_READ)) - return 0; - if (curr->softirq_context) - if (!mark_lock(curr, hlock, - LOCK_USED_IN_SOFTIRQ_READ)) - return 0; - } else { - if (curr->hardirq_context) - if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) - return 0; - if (curr->softirq_context) - if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ)) - return 0; - } - } - if (!hardirqs_off) { - if (read) { - if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQS_READ)) - return 0; - if (curr->softirqs_enabled) - if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQS_READ)) - return 0; - } else { - if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQS)) - return 0; - if (curr->softirqs_enabled) - if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQS)) - return 0; - } - } +#ifdef CONFIG_LOCK_STAT + hlock->waittime_stamp = 0; + hlock->holdtime_stamp = sched_clock(); #endif + + if (check == 2 && !mark_irqflags(curr, hlock)) + return 0; + /* mark it as used: */ if (!mark_lock(curr, hlock, LOCK_USED)) return 0; -out_calc_hash: + /* * Calculate the chain hash: it's the combined has of all the * lock keys along the dependency chain. We save the hash value @@ -2143,77 +2445,15 @@ out_calc_hash: } hlock->prev_chain_key = chain_key; - -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * Keep track of points where we cross into an interrupt context: - */ - hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + - curr->softirq_context; - if (depth) { - struct held_lock *prev_hlock; - - prev_hlock = curr->held_locks + depth-1; - /* - * If we cross into another context, reset the - * hash key (this also prevents the checking and the - * adding of the dependency to 'prev'): - */ - if (prev_hlock->irq_context != hlock->irq_context) { - chain_key = 0; - chain_head = 1; - } + if (separate_irq_context(curr, hlock)) { + chain_key = 0; + chain_head = 1; } -#endif chain_key = iterate_chain_key(chain_key, id); curr->curr_chain_key = chain_key; - /* - * Trylock needs to maintain the stack of held locks, but it - * does not add new dependencies, because trylock can be done - * in any order. - * - * We look up the chain_key and do the O(N^2) check and update of - * the dependencies only if this is a new dependency chain. - * (If lookup_chain_cache() returns with 1 it acquires - * graph_lock for us) - */ - if (!trylock && (check == 2) && lookup_chain_cache(chain_key, class)) { - /* - * Check whether last held lock: - * - * - is irq-safe, if this lock is irq-unsafe - * - is softirq-safe, if this lock is hardirq-unsafe - * - * And check whether the new lock's dependency graph - * could lead back to the previous lock. - * - * any of these scenarios could lead to a deadlock. If - * All validations - */ - int ret = check_deadlock(curr, hlock, lock, read); - - if (!ret) - return 0; - /* - * Mark recursive read, as we jump over it when - * building dependencies (just like we jump over - * trylock entries): - */ - if (ret == 2) - hlock->read = 2; - /* - * Add dependency only if this lock is not the head - * of the chain, and if it's not a secondary read-lock: - */ - if (!chain_head && ret != 2) - if (!check_prevs_add(curr, hlock)) - return 0; - graph_unlock(); - } else - /* after lookup_chain_cache(): */ - if (unlikely(!debug_locks)) - return 0; + if (!validate_chain(curr, lock, hlock, chain_head)) + return 0; curr->lockdep_depth++; check_chain_key(curr); @@ -2315,6 +2555,8 @@ lock_release_non_nested(struct task_struct *curr, return print_unlock_inbalance_bug(curr, lock, ip); found_it: + lock_release_holdtime(hlock); + /* * We have the right lock to unlock, 'hlock' points to it. * Now we remove it from the stack, and add back the other @@ -2367,6 +2609,8 @@ static int lock_release_nested(struct task_struct *curr, curr->curr_chain_key = hlock->prev_chain_key; + lock_release_holdtime(hlock); + #ifdef CONFIG_DEBUG_LOCKDEP hlock->prev_chain_key = 0; hlock->class = NULL; @@ -2441,6 +2685,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, { unsigned long flags; + if (unlikely(!lock_stat && !prove_locking)) + return; + if (unlikely(current->lockdep_recursion)) return; @@ -2460,6 +2707,9 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { unsigned long flags; + if (unlikely(!lock_stat && !prove_locking)) + return; + if (unlikely(current->lockdep_recursion)) return; @@ -2473,6 +2723,166 @@ void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) EXPORT_SYMBOL_GPL(lock_release); +#ifdef CONFIG_LOCK_STAT +static int +print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n=================================\n"); + printk( "[ BUG: bad contention detected! ]\n"); + printk( "---------------------------------\n"); + printk("%s/%d is trying to contend lock (", + curr->comm, curr->pid); + print_lockdep_cache(lock); + printk(") at:\n"); + print_ip_sym(ip); + printk("but there are no locks held!\n"); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static void +__lock_contended(struct lockdep_map *lock, unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class_stats *stats; + unsigned int depth; + int i, point; + + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(!depth)) + return; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (hlock->instance == lock) + goto found_it; + prev_hlock = hlock; + } + print_lock_contention_bug(curr, lock, ip); + return; + +found_it: + hlock->waittime_stamp = sched_clock(); + + point = lock_contention_point(hlock->class, ip); + + stats = get_lock_stats(hlock->class); + if (point < ARRAY_SIZE(stats->contention_point)) + stats->contention_point[i]++; + if (lock->cpu != smp_processor_id()) + stats->bounces[bounce_contended + !!hlock->read]++; + put_lock_stats(stats); +} + +static void +__lock_acquired(struct lockdep_map *lock) +{ + struct task_struct *curr = current; + struct held_lock *hlock, *prev_hlock; + struct lock_class_stats *stats; + unsigned int depth; + u64 now; + s64 waittime = 0; + int i, cpu; + + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(!depth)) + return; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (hlock->instance == lock) + goto found_it; + prev_hlock = hlock; + } + print_lock_contention_bug(curr, lock, _RET_IP_); + return; + +found_it: + cpu = smp_processor_id(); + if (hlock->waittime_stamp) { + now = sched_clock(); + waittime = now - hlock->waittime_stamp; + hlock->holdtime_stamp = now; + } + + stats = get_lock_stats(hlock->class); + if (waittime) { + if (hlock->read) + lock_time_inc(&stats->read_waittime, waittime); + else + lock_time_inc(&stats->write_waittime, waittime); + } + if (lock->cpu != cpu) + stats->bounces[bounce_acquired + !!hlock->read]++; + put_lock_stats(stats); + + lock->cpu = cpu; +} + +void lock_contended(struct lockdep_map *lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(!lock_stat)) + return; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_contended(lock, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_contended); + +void lock_acquired(struct lockdep_map *lock) +{ + unsigned long flags; + + if (unlikely(!lock_stat)) + return; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_acquired(lock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_acquired); +#endif + /* * Used by the testsuite, sanitize the validator state * after a simulated failure: @@ -2636,8 +3046,11 @@ void __init lockdep_info(void) sizeof(struct held_lock) * MAX_LOCK_DEPTH); #ifdef CONFIG_DEBUG_LOCKDEP - if (lockdep_init_error) - printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); + if (lockdep_init_error) { + printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n"); + printk("Call stack leading to lockdep invocation was:\n"); + print_stack_trace(&lockdep_init_trace, 0); + } #endif } diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 58f35e586ee3..9f17af4a2490 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -5,7 +5,8 @@ * * Started by Ingo Molnar: * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> * * Code for /proc/lockdep and /proc/lockdep_stats: * @@ -15,6 +16,10 @@ #include <linux/seq_file.h> #include <linux/kallsyms.h> #include <linux/debug_locks.h> +#include <linux/vmalloc.h> +#include <linux/sort.h> +#include <asm/uaccess.h> +#include <asm/div64.h> #include "lockdep_internals.h" @@ -271,8 +276,10 @@ static int lockdep_stats_show(struct seq_file *m, void *v) if (nr_list_entries) factor = sum_forward_deps / nr_list_entries; +#ifdef CONFIG_PROVE_LOCKING seq_printf(m, " dependency chains: %11lu [max: %lu]\n", nr_lock_chains, MAX_LOCKDEP_CHAINS); +#endif #ifdef CONFIG_TRACE_IRQFLAGS seq_printf(m, " in-hardirq chains: %11u\n", @@ -342,6 +349,292 @@ static const struct file_operations proc_lockdep_stats_operations = { .release = seq_release, }; +#ifdef CONFIG_LOCK_STAT + +struct lock_stat_data { + struct lock_class *class; + struct lock_class_stats stats; +}; + +struct lock_stat_seq { + struct lock_stat_data *iter; + struct lock_stat_data *iter_end; + struct lock_stat_data stats[MAX_LOCKDEP_KEYS]; +}; + +/* + * sort on absolute number of contentions + */ +static int lock_stat_cmp(const void *l, const void *r) +{ + const struct lock_stat_data *dl = l, *dr = r; + unsigned long nl, nr; + + nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr; + nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr; + + return nr - nl; +} + +static void seq_line(struct seq_file *m, char c, int offset, int length) +{ + int i; + + for (i = 0; i < offset; i++) + seq_puts(m, " "); + for (i = 0; i < length; i++) + seq_printf(m, "%c", c); + seq_puts(m, "\n"); +} + +static void snprint_time(char *buf, size_t bufsiz, s64 nr) +{ + unsigned long rem; + + rem = do_div(nr, 1000); /* XXX: do_div_signed */ + snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); +} + +static void seq_time(struct seq_file *m, s64 time) +{ + char num[15]; + + snprint_time(num, sizeof(num), time); + seq_printf(m, " %14s", num); +} + +static void seq_lock_time(struct seq_file *m, struct lock_time *lt) +{ + seq_printf(m, "%14lu", lt->nr); + seq_time(m, lt->min); + seq_time(m, lt->max); + seq_time(m, lt->total); +} + +static void seq_stats(struct seq_file *m, struct lock_stat_data *data) +{ + char name[39]; + struct lock_class *class; + struct lock_class_stats *stats; + int i, namelen; + + class = data->class; + stats = &data->stats; + + namelen = 38; + if (class->name_version > 1) + namelen -= 2; /* XXX truncates versions > 9 */ + if (class->subclass) + namelen -= 2; + + if (!class->name) { + char str[KSYM_NAME_LEN]; + const char *key_name; + + key_name = __get_key_name(class->key, str); + snprintf(name, namelen, "%s", key_name); + } else { + snprintf(name, namelen, "%s", class->name); + } + namelen = strlen(name); + if (class->name_version > 1) { + snprintf(name+namelen, 3, "#%d", class->name_version); + namelen += 2; + } + if (class->subclass) { + snprintf(name+namelen, 3, "/%d", class->subclass); + namelen += 2; + } + + if (stats->write_holdtime.nr) { + if (stats->read_holdtime.nr) + seq_printf(m, "%38s-W:", name); + else + seq_printf(m, "%40s:", name); + + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]); + seq_lock_time(m, &stats->write_waittime); + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]); + seq_lock_time(m, &stats->write_holdtime); + seq_puts(m, "\n"); + } + + if (stats->read_holdtime.nr) { + seq_printf(m, "%38s-R:", name); + seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]); + seq_lock_time(m, &stats->read_waittime); + seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]); + seq_lock_time(m, &stats->read_holdtime); + seq_puts(m, "\n"); + } + + if (stats->read_waittime.nr + stats->write_waittime.nr == 0) + return; + + if (stats->read_holdtime.nr) + namelen += 2; + + for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contention_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contention_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contention_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contention_point[i], + ip, sym); + } + if (i) { + seq_puts(m, "\n"); + seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); + seq_puts(m, "\n"); + } +} + +static void seq_header(struct seq_file *m) +{ + seq_printf(m, "lock_stat version 0.2\n"); + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " + "%14s %14s\n", + "class name", + "con-bounces", + "contentions", + "waittime-min", + "waittime-max", + "waittime-total", + "acq-bounces", + "acquisitions", + "holdtime-min", + "holdtime-max", + "holdtime-total"); + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); + seq_printf(m, "\n"); +} + +static void *ls_start(struct seq_file *m, loff_t *pos) +{ + struct lock_stat_seq *data = m->private; + + if (data->iter == data->stats) + seq_header(m); + + if (data->iter == data->iter_end) + data->iter = NULL; + + return data->iter; +} + +static void *ls_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct lock_stat_seq *data = m->private; + + (*pos)++; + + data->iter = v; + data->iter++; + if (data->iter == data->iter_end) + data->iter = NULL; + + return data->iter; +} + +static void ls_stop(struct seq_file *m, void *v) +{ +} + +static int ls_show(struct seq_file *m, void *v) +{ + struct lock_stat_seq *data = m->private; + + seq_stats(m, data->iter); + return 0; +} + +static struct seq_operations lockstat_ops = { + .start = ls_start, + .next = ls_next, + .stop = ls_stop, + .show = ls_show, +}; + +static int lock_stat_open(struct inode *inode, struct file *file) +{ + int res; + struct lock_class *class; + struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq)); + + if (!data) + return -ENOMEM; + + res = seq_open(file, &lockstat_ops); + if (!res) { + struct lock_stat_data *iter = data->stats; + struct seq_file *m = file->private_data; + + data->iter = iter; + list_for_each_entry(class, &all_lock_classes, lock_entry) { + iter->class = class; + iter->stats = lock_stats(class); + iter++; + } + data->iter_end = iter; + + sort(data->stats, data->iter_end - data->iter, + sizeof(struct lock_stat_data), + lock_stat_cmp, NULL); + + m->private = data; + } else + vfree(data); + + return res; +} + +static ssize_t lock_stat_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct lock_class *class; + char c; + + if (count) { + if (get_user(c, buf)) + return -EFAULT; + + if (c != '0') + return count; + + list_for_each_entry(class, &all_lock_classes, lock_entry) + clear_lock_stats(class); + } + return count; +} + +static int lock_stat_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + seq->private = NULL; + return seq_release(inode, file); +} + +static const struct file_operations proc_lock_stat_operations = { + .open = lock_stat_open, + .write = lock_stat_write, + .read = seq_read, + .llseek = seq_lseek, + .release = lock_stat_release, +}; +#endif /* CONFIG_LOCK_STAT */ + static int __init lockdep_proc_init(void) { struct proc_dir_entry *entry; @@ -354,6 +647,12 @@ static int __init lockdep_proc_init(void) if (entry) entry->proc_fops = &proc_lockdep_stats_operations; +#ifdef CONFIG_LOCK_STAT + entry = create_proc_entry("lock_stat", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &proc_lock_stat_operations; +#endif + return 0; } diff --git a/kernel/mutex.c b/kernel/mutex.c index 303eab18484b..691b86564dd9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -139,6 +139,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; + old_val = atomic_xchg(&lock->count, -1); + if (old_val == 1) + goto done; + + lock_contended(&lock->dep_map, _RET_IP_); + for (;;) { /* * Lets try to take the lock again - this is needed even if @@ -174,6 +180,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) spin_lock_mutex(&lock->wait_lock, flags); } +done: + lock_acquired(&lock->dep_map); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 495b7d4dd330..7358609e4735 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -33,13 +33,20 @@ config PM_DEBUG bool "Power Management Debug Support" depends on PM ---help--- - This option enables verbose debugging support in the Power Management - code. This is helpful when debugging and reporting various PM bugs, - like suspend support. + This option enables various debugging support in the Power Management + code. This is helpful when debugging and reporting PM bugs, like + suspend support. + +config PM_VERBOSE + bool "Verbose Power Management debugging" + depends on PM_DEBUG + default n + ---help--- + This option enables verbose messages from the Power Management code. config DISABLE_CONSOLE_SUSPEND bool "Keep console(s) enabled during suspend/resume (DANGEROUS)" - depends on PM && PM_DEBUG + depends on PM_DEBUG default n ---help--- This option turns off the console suspend mechanism that prevents @@ -50,7 +57,7 @@ config DISABLE_CONSOLE_SUSPEND config PM_TRACE bool "Suspend/resume event tracing" - depends on PM && PM_DEBUG && X86_32 && EXPERIMENTAL + depends on PM_DEBUG && X86_32 && EXPERIMENTAL default n ---help--- This enables some cheesy code to save the last PM event point in the @@ -65,18 +72,6 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. -config PM_SYSFS_DEPRECATED - bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" - depends on PM && SYSFS - default n - help - The driver model started out with a sysfs file intended to provide - a userspace hook for device power management. This feature has never - worked very well, except for limited testing purposes, and so it will - be removed. It's not clear that a generic mechanism could really - handle the wide variability of device power states; any replacements - are likely to be bus or driver specific. - config SOFTWARE_SUSPEND bool "Software Suspend (Hibernation)" depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f445b9cd60fb..324ac0188ce1 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -45,7 +45,7 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; -struct hibernation_ops *hibernation_ops; +static struct hibernation_ops *hibernation_ops; /** * hibernation_set_ops - set the global hibernate operations @@ -54,7 +54,8 @@ struct hibernation_ops *hibernation_ops; void hibernation_set_ops(struct hibernation_ops *ops) { - if (ops && !(ops->prepare && ops->enter && ops->finish)) { + if (ops && !(ops->prepare && ops->enter && ops->finish + && ops->pre_restore && ops->restore_cleanup)) { WARN_ON(1); return; } @@ -74,9 +75,9 @@ void hibernation_set_ops(struct hibernation_ops *ops) * platform driver if so configured and return an error code if it fails */ -static int platform_prepare(void) +static int platform_prepare(int platform_mode) { - return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? + return (platform_mode && hibernation_ops) ? hibernation_ops->prepare() : 0; } @@ -85,13 +86,145 @@ static int platform_prepare(void) * using the platform driver (must be called after platform_prepare()) */ -static void platform_finish(void) +static void platform_finish(int platform_mode) { - if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) + if (platform_mode && hibernation_ops) hibernation_ops->finish(); } /** + * platform_pre_restore - prepare the platform for the restoration from a + * hibernation image. If the restore fails after this function has been + * called, platform_restore_cleanup() must be called. + */ + +static int platform_pre_restore(int platform_mode) +{ + return (platform_mode && hibernation_ops) ? + hibernation_ops->pre_restore() : 0; +} + +/** + * platform_restore_cleanup - switch the platform to the normal mode of + * operation after a failing restore. If platform_pre_restore() has been + * called before the failing restore, this function must be called too, + * regardless of the result of platform_pre_restore(). + */ + +static void platform_restore_cleanup(int platform_mode) +{ + if (platform_mode && hibernation_ops) + hibernation_ops->restore_cleanup(); +} + +/** + * hibernation_snapshot - quiesce devices and create the hibernation + * snapshot image. + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform frimware for the power transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_snapshot(int platform_mode) +{ + int error; + + /* Free memory before shutting down devices. */ + error = swsusp_shrink_memory(); + if (error) + return error; + + suspend_console(); + error = device_suspend(PMSG_FREEZE); + if (error) + goto Resume_console; + + error = platform_prepare(platform_mode); + if (error) + goto Resume_devices; + + error = disable_nonboot_cpus(); + if (!error) { + if (hibernation_mode != HIBERNATION_TEST) { + in_suspend = 1; + error = swsusp_suspend(); + /* Control returns here after successful restore */ + } else { + printk("swsusp debug: Waiting for 5 seconds.\n"); + mdelay(5000); + } + } + enable_nonboot_cpus(); + Resume_devices: + platform_finish(platform_mode); + device_resume(); + Resume_console: + resume_console(); + return error; +} + +/** + * hibernation_restore - quiesce devices and restore the hibernation + * snapshot image. If successful, control returns in hibernation_snaphot() + * @platform_mode - if set, use the platform driver, if available, to + * prepare the platform frimware for the transition. + * + * Must be called with pm_mutex held + */ + +int hibernation_restore(int platform_mode) +{ + int error; + + pm_prepare_console(); + suspend_console(); + error = device_suspend(PMSG_PRETHAW); + if (error) + goto Finish; + + error = platform_pre_restore(platform_mode); + if (!error) { + error = disable_nonboot_cpus(); + if (!error) + error = swsusp_resume(); + enable_nonboot_cpus(); + } + platform_restore_cleanup(platform_mode); + device_resume(); + Finish: + resume_console(); + pm_restore_console(); + return error; +} + +/** + * hibernation_platform_enter - enter the hibernation state using the + * platform driver (if available) + */ + +int hibernation_platform_enter(void) +{ + int error; + + if (hibernation_ops) { + kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); + /* + * We have cancelled the power transition by running + * hibernation_ops->finish() before saving the image, so we + * should let the firmware know that we're going to enter the + * sleep state after all + */ + error = hibernation_ops->prepare(); + if (!error) + error = hibernation_ops->enter(); + } else { + error = -ENOSYS; + } + return error; +} + +/** * power_down - Shut the machine down for hibernation. * * Use the platform driver, if configured so; otherwise try @@ -111,11 +244,7 @@ static void power_down(void) kernel_restart(NULL); break; case HIBERNATION_PLATFORM: - if (hibernation_ops) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - hibernation_ops->enter(); - break; - } + hibernation_platform_enter(); } kernel_halt(); /* @@ -152,9 +281,16 @@ int hibernate(void) { int error; + mutex_lock(&pm_mutex); /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) - return -EBUSY; + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (error) + goto Exit; /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); @@ -165,75 +301,35 @@ int hibernate(void) if (error) goto Finish; - mutex_lock(&pm_mutex); if (hibernation_mode == HIBERNATION_TESTPROC) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); goto Thaw; } + error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); + if (in_suspend && !error) { + unsigned int flags = 0; - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Thaw; - - error = platform_prepare(); - if (error) - goto Thaw; - - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (error) { - printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Resume_devices; - } - error = disable_nonboot_cpus(); - if (error) - goto Enable_cpus; - - if (hibernation_mode == HIBERNATION_TEST) { - printk("swsusp debug: Waiting for 5 seconds.\n"); - mdelay(5000); - goto Enable_cpus; - } - - pr_debug("PM: snapshotting memory.\n"); - in_suspend = 1; - error = swsusp_suspend(); - if (error) - goto Enable_cpus; - - if (in_suspend) { - enable_nonboot_cpus(); - platform_finish(); - device_resume(); - resume_console(); + if (hibernation_mode == HIBERNATION_PLATFORM) + flags |= SF_PLATFORM_MODE; pr_debug("PM: writing image.\n"); - error = swsusp_write(); + error = swsusp_write(flags); + swsusp_free(); if (!error) power_down(); - else { - swsusp_free(); - goto Thaw; - } } else { pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); } - - swsusp_free(); - Enable_cpus: - enable_nonboot_cpus(); - Resume_devices: - platform_finish(); - device_resume(); - resume_console(); Thaw: - mutex_unlock(&pm_mutex); unprepare_processes(); Finish: free_basic_memory_bitmaps(); Exit: + pm_notifier_call_chain(PM_POST_HIBERNATION); atomic_inc(&snapshot_device_available); + Unlock: + mutex_unlock(&pm_mutex); return error; } @@ -253,6 +349,7 @@ int hibernate(void) static int software_resume(void) { int error; + unsigned int flags; mutex_lock(&pm_mutex); if (!swsusp_resume_device) { @@ -300,30 +397,12 @@ static int software_resume(void) pr_debug("PM: Reading swsusp image.\n"); - error = swsusp_read(); - if (error) { - swsusp_free(); - goto Thaw; - } - - pr_debug("PM: Preparing devices for restore.\n"); - - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (error) - goto Free; - - error = disable_nonboot_cpus(); + error = swsusp_read(&flags); if (!error) - swsusp_resume(); + hibernation_restore(flags & SF_PLATFORM_MODE); - enable_nonboot_cpus(); - Free: - swsusp_free(); - device_resume(); - resume_console(); - Thaw: printk(KERN_ERR "PM: Restore failed, recovering.\n"); + swsusp_free(); unprepare_processes(); Done: free_basic_memory_bitmaps(); @@ -333,7 +412,7 @@ static int software_resume(void) Unlock: mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); - return 0; + return error; } late_initcall(software_resume); diff --git a/kernel/power/main.c b/kernel/power/main.c index fc45ed22620f..32147b57c3bf 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -23,6 +23,8 @@ #include "power.h" +BLOCKING_NOTIFIER_HEAD(pm_chain_head); + /*This is just an arbitrary number */ #define FREE_PAGE_NUMBER (100) @@ -63,14 +65,11 @@ static inline void pm_finish(suspend_state_t state) /** * suspend_prepare - Do prep work before entering low-power state. - * @state: State we're entering. * - * This is common code that is called for each state that we're - * entering. Allocate a console, stop all processes, then make sure - * the platform can enter the requested state. + * This is common code that is called for each state that we're entering. + * Run suspend notifiers, allocate a console and stop all processes. */ - -static int suspend_prepare(suspend_state_t state) +static int suspend_prepare(void) { int error; unsigned int free_pages; @@ -78,6 +77,10 @@ static int suspend_prepare(suspend_state_t state) if (!pm_ops || !pm_ops->enter) return -EPERM; + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + pm_prepare_console(); if (freeze_processes()) { @@ -85,46 +88,23 @@ static int suspend_prepare(suspend_state_t state) goto Thaw; } - if ((free_pages = global_page_state(NR_FREE_PAGES)) - < FREE_PAGE_NUMBER) { + free_pages = global_page_state(NR_FREE_PAGES); + if (free_pages < FREE_PAGE_NUMBER) { pr_debug("PM: free some memory\n"); shrink_all_memory(FREE_PAGE_NUMBER - free_pages); if (nr_free_pages() < FREE_PAGE_NUMBER) { error = -ENOMEM; printk(KERN_ERR "PM: No enough memory\n"); - goto Thaw; } } - - if (pm_ops->set_target) { - error = pm_ops->set_target(state); - if (error) - goto Thaw; - } - suspend_console(); - error = device_suspend(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "Some devices failed to suspend\n"); - goto Resume_console; - } - if (pm_ops->prepare) { - if ((error = pm_ops->prepare(state))) - goto Resume_devices; - } - - error = disable_nonboot_cpus(); if (!error) return 0; - enable_nonboot_cpus(); - pm_finish(state); - Resume_devices: - device_resume(); - Resume_console: - resume_console(); Thaw: thaw_processes(); pm_restore_console(); + Finish: + pm_notifier_call_chain(PM_POST_SUSPEND); return error; } @@ -140,6 +120,12 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) local_irq_enable(); } +/** + * suspend_enter - enter the desired system sleep state. + * @state: state to enter + * + * This function should be called after devices have been suspended. + */ int suspend_enter(suspend_state_t state) { int error = 0; @@ -159,23 +145,58 @@ int suspend_enter(suspend_state_t state) return error; } +/** + * suspend_devices_and_enter - suspend devices and enter the desired system sleep + * state. + * @state: state to enter + */ +int suspend_devices_and_enter(suspend_state_t state) +{ + int error; + + if (!pm_ops) + return -ENOSYS; + + if (pm_ops->set_target) { + error = pm_ops->set_target(state); + if (error) + return error; + } + suspend_console(); + error = device_suspend(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "Some devices failed to suspend\n"); + goto Resume_console; + } + if (pm_ops->prepare) { + error = pm_ops->prepare(state); + if (error) + goto Resume_devices; + } + error = disable_nonboot_cpus(); + if (!error) + suspend_enter(state); + + enable_nonboot_cpus(); + pm_finish(state); + Resume_devices: + device_resume(); + Resume_console: + resume_console(); + return error; +} /** * suspend_finish - Do final work before exiting suspend sequence. - * @state: State we're coming out of. * * Call platform code to clean up, restart processes, and free the * console that we've allocated. This is not called for suspend-to-disk. */ - -static void suspend_finish(suspend_state_t state) +static void suspend_finish(void) { - enable_nonboot_cpus(); - pm_finish(state); - device_resume(); - resume_console(); thaw_processes(); pm_restore_console(); + pm_notifier_call_chain(PM_POST_SUSPEND); } @@ -207,7 +228,6 @@ static inline int valid_state(suspend_state_t state) * Then, do the setup for suspend, enter the state, and cleaup (after * we've woken up). */ - static int enter_state(suspend_state_t state) { int error; @@ -218,14 +238,14 @@ static int enter_state(suspend_state_t state) return -EBUSY; pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); - if ((error = suspend_prepare(state))) + if ((error = suspend_prepare())) goto Unlock; pr_debug("PM: Entering %s sleep\n", pm_states[state]); - error = suspend_enter(state); + error = suspend_devices_and_enter(state); pr_debug("PM: Finishing wakeup.\n"); - suspend_finish(state); + suspend_finish(); Unlock: mutex_unlock(&pm_mutex); return error; diff --git a/kernel/power/power.h b/kernel/power/power.h index 51381487103f..5f24c786f8ec 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -25,7 +25,10 @@ struct swsusp_info { */ #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) -extern struct hibernation_ops *hibernation_ops; +/* kernel/power/disk.c */ +extern int hibernation_snapshot(int platform_mode); +extern int hibernation_restore(int platform_mode); +extern int hibernation_platform_enter(void); #endif extern int pfn_is_nosave(unsigned long); @@ -152,16 +155,34 @@ extern sector_t alloc_swapdev_block(int swap); extern void free_all_swap_pages(int swap); extern int swsusp_swap_in_use(void); +/* + * Flags that can be passed from the hibernatig hernel to the "boot" kernel in + * the image header. + */ +#define SF_PLATFORM_MODE 1 + +/* kernel/power/disk.c */ extern int swsusp_check(void); extern int swsusp_shrink_memory(void); extern void swsusp_free(void); extern int swsusp_suspend(void); extern int swsusp_resume(void); -extern int swsusp_read(void); -extern int swsusp_write(void); +extern int swsusp_read(unsigned int *flags_p); +extern int swsusp_write(unsigned int flags); extern void swsusp_close(void); -extern int suspend_enter(suspend_state_t state); struct timeval; +/* kernel/power/swsusp.c */ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); + +/* kernel/power/main.c */ +extern int suspend_enter(suspend_state_t state); +extern int suspend_devices_and_enter(suspend_state_t state); +extern struct blocking_notifier_head pm_chain_head; + +static inline int pm_notifier_call_chain(unsigned long val) +{ + return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) + == NOTIFY_BAD) ? -EINVAL : 0; +} diff --git a/kernel/power/process.c b/kernel/power/process.c index e0233d8422b9..3434940a3df1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -40,7 +40,7 @@ static inline void frozen_process(void) current->flags |= PF_FROZEN; wmb(); } - clear_tsk_thread_flag(current, TIF_FREEZE); + clear_freeze_flag(current); } /* Refrigerator is place where frozen processes are stored :-). */ @@ -72,20 +72,19 @@ void refrigerator(void) schedule(); } pr_debug("%s left refrigerator\n", current->comm); - current->state = save; + __set_current_state(save); } -static inline void freeze_process(struct task_struct *p) +static void freeze_task(struct task_struct *p) { unsigned long flags; if (!freezing(p)) { rmb(); if (!frozen(p)) { + set_freeze_flag(p); if (p->state == TASK_STOPPED) force_sig_specific(SIGSTOP, p); - - freeze(p); spin_lock_irqsave(&p->sighand->siglock, flags); signal_wake_up(p, p->state == TASK_STOPPED); spin_unlock_irqrestore(&p->sighand->siglock, flags); @@ -99,19 +98,14 @@ static void cancel_freezing(struct task_struct *p) if (freezing(p)) { pr_debug(" clean up: %s\n", p->comm); - do_not_freeze(p); + clear_freeze_flag(p); spin_lock_irqsave(&p->sighand->siglock, flags); recalc_sigpending_and_wake(p); spin_unlock_irqrestore(&p->sighand->siglock, flags); } } -static inline int is_user_space(struct task_struct *p) -{ - return p->mm && !(p->flags & PF_BORROWED_MM); -} - -static unsigned int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(int freeze_user_space) { struct task_struct *g, *p; unsigned long end_time; @@ -122,26 +116,40 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (frozen(p) || !freezeable(p)) continue; - if (frozen(p)) - continue; - - if (p->state == TASK_TRACED && frozen(p->parent)) { - cancel_freezing(p); - continue; + if (freeze_user_space) { + if (p->state == TASK_TRACED && + frozen(p->parent)) { + cancel_freezing(p); + continue; + } + /* + * Kernel threads should not have TIF_FREEZE set + * at this point, so we must ensure that either + * p->mm is not NULL *and* PF_BORROWED_MM is + * unset, or TIF_FRREZE is left unset. + * The task_lock() is necessary to prevent races + * with exit_mm() or use_mm()/unuse_mm() from + * occuring. + */ + task_lock(p); + if (!p->mm || (p->flags & PF_BORROWED_MM)) { + task_unlock(p); + continue; + } + freeze_task(p); + task_unlock(p); + } else { + freeze_task(p); } - if (freeze_user_space && !is_user_space(p)) - continue; - - freeze_process(p); if (!freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ - if (todo && time_after(jiffies, end_time)) + if (time_after(jiffies, end_time)) break; } while (todo); @@ -152,49 +160,41 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) * but it cleans up leftover PF_FREEZE requests. */ printk("\n"); - printk(KERN_ERR "Stopping %s timed out after %d seconds " + printk(KERN_ERR "Freezing of %s timed out after %d seconds " "(%d tasks refusing to freeze):\n", - freeze_user_space ? "user space processes" : - "kernel threads", + freeze_user_space ? "user space " : "tasks ", TIMEOUT / HZ, todo); + show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { - if (freeze_user_space && !is_user_space(p)) - continue; - task_lock(p); - if (freezeable(p) && !frozen(p) && - !freezer_should_skip(p)) + if (freezing(p) && !freezer_should_skip(p)) printk(KERN_ERR " %s\n", p->comm); - cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } - return todo; + return todo ? -EBUSY : 0; } /** * freeze_processes - tell processes to enter the refrigerator - * - * Returns 0 on success, or the number of processes that didn't freeze, - * although they were told to. */ int freeze_processes(void) { - unsigned int nr_unfrozen; + int error; printk("Stopping tasks ... "); - nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE); - if (nr_unfrozen) - return nr_unfrozen; + error = try_to_freeze_tasks(FREEZER_USER_SPACE); + if (error) + return error; sys_sync(); - nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); - if (nr_unfrozen) - return nr_unfrozen; + error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + if (error) + return error; printk("done.\n"); BUG_ON(in_atomic()); @@ -210,7 +210,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (is_user_space(p) == !thaw_user_space) + if (!p->mm == thaw_user_space) continue; thaw_process(p); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8b1a1b837145..917aba100575 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,8 +33,9 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; sector_t image; + unsigned int flags; /* Flags to pass to the "boot" kernel */ char orig_sig[10]; char sig[10]; } __attribute__((packed)); @@ -138,7 +139,7 @@ static int wait_on_bio_chain(struct bio **bio_chain) * Saving part */ -static int mark_swapfiles(sector_t start) +static int mark_swapfiles(sector_t start, unsigned int flags) { int error; @@ -148,6 +149,7 @@ static int mark_swapfiles(sector_t start) memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); memcpy(swsusp_header->sig,SWSUSP_SIG, 10); swsusp_header->image = start; + swsusp_header->flags = flags; error = bio_write_page(swsusp_resume_block, swsusp_header, NULL); } else { @@ -369,6 +371,7 @@ static int enough_swap(unsigned int nr_pages) /** * swsusp_write - Write entire image and metadata. + * @flags: flags to pass to the "boot" kernel in the image header * * It is important _NOT_ to umount filesystems at this point. We want * them synced (in case something goes wrong) but we DO not want to mark @@ -376,7 +379,7 @@ static int enough_swap(unsigned int nr_pages) * correctly, we'll mark system clean, anyway.) */ -int swsusp_write(void) +int swsusp_write(unsigned int flags) { struct swap_map_handle handle; struct snapshot_handle snapshot; @@ -415,7 +418,7 @@ int swsusp_write(void) if (!error) { flush_swap_writer(&handle); printk("S"); - error = mark_swapfiles(start); + error = mark_swapfiles(start, flags); printk("|\n"); } } @@ -540,13 +543,20 @@ static int load_image(struct swap_map_handle *handle, return error; } -int swsusp_read(void) +/** + * swsusp_read - read the hibernation image. + * @flags_p: flags passed by the "frozen" kernel in the image header should + * be written into this memeory location + */ + +int swsusp_read(unsigned int *flags_p) { int error; struct swap_map_handle handle; struct snapshot_handle snapshot; struct swsusp_info *header; + *flags_p = swsusp_header->flags; if (IS_ERR(resume_bdev)) { pr_debug("swsusp: block device not initialised\n"); return PTR_ERR(resume_bdev); diff --git a/kernel/power/user.c b/kernel/power/user.c index d65305b515b1..bd0723a7df3f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -128,92 +128,6 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } -static inline int platform_prepare(void) -{ - int error = 0; - - if (hibernation_ops) - error = hibernation_ops->prepare(); - - return error; -} - -static inline void platform_finish(void) -{ - if (hibernation_ops) - hibernation_ops->finish(); -} - -static inline int snapshot_suspend(int platform_suspend) -{ - int error; - - mutex_lock(&pm_mutex); - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); - if (error) - goto Finish; - - if (platform_suspend) { - error = platform_prepare(); - if (error) - goto Finish; - } - suspend_console(); - error = device_suspend(PMSG_FREEZE); - if (error) - goto Resume_devices; - - error = disable_nonboot_cpus(); - if (!error) { - in_suspend = 1; - error = swsusp_suspend(); - } - enable_nonboot_cpus(); - Resume_devices: - if (platform_suspend) - platform_finish(); - - device_resume(); - resume_console(); - Finish: - mutex_unlock(&pm_mutex); - return error; -} - -static inline int snapshot_restore(int platform_suspend) -{ - int error; - - mutex_lock(&pm_mutex); - pm_prepare_console(); - if (platform_suspend) { - error = platform_prepare(); - if (error) - goto Finish; - } - suspend_console(); - error = device_suspend(PMSG_PRETHAW); - if (error) - goto Resume_devices; - - error = disable_nonboot_cpus(); - if (!error) - error = swsusp_resume(); - - enable_nonboot_cpus(); - Resume_devices: - if (platform_suspend) - platform_finish(); - - device_resume(); - resume_console(); - Finish: - pm_restore_console(); - mutex_unlock(&pm_mutex); - return error; -} - static int snapshot_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -237,10 +151,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, if (data->frozen) break; mutex_lock(&pm_mutex); - if (freeze_processes()) { - thaw_processes(); - error = -EBUSY; + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + if (!error) { + error = freeze_processes(); + if (error) + thaw_processes(); } + if (error) + pm_notifier_call_chain(PM_POST_HIBERNATION); mutex_unlock(&pm_mutex); if (!error) data->frozen = 1; @@ -251,6 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; mutex_lock(&pm_mutex); thaw_processes(); + pm_notifier_call_chain(PM_POST_HIBERNATION); mutex_unlock(&pm_mutex); data->frozen = 0; break; @@ -260,7 +179,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_suspend(data->platform_suspend); + error = hibernation_snapshot(data->platform_suspend); if (!error) error = put_user(in_suspend, (unsigned int __user *)arg); if (!error) @@ -274,7 +193,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -EPERM; break; } - error = snapshot_restore(data->platform_suspend); + error = hibernation_restore(data->platform_suspend); break; case SNAPSHOT_FREE: @@ -336,47 +255,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_S2RAM: - if (!pm_ops) { - error = -ENOSYS; - break; - } - if (!data->frozen) { error = -EPERM; break; } - if (!mutex_trylock(&pm_mutex)) { error = -EBUSY; break; } - - if (pm_ops->prepare) { - error = pm_ops->prepare(PM_SUSPEND_MEM); - if (error) - goto OutS3; - } - - /* Put devices to sleep */ - suspend_console(); - error = device_suspend(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "Failed to suspend some devices.\n"); - } else { - error = disable_nonboot_cpus(); - if (!error) { - /* Enter S3, system is already frozen */ - suspend_enter(PM_SUSPEND_MEM); - enable_nonboot_cpus(); - } - /* Wake up devices */ - device_resume(); - } - resume_console(); - if (pm_ops->finish) - pm_ops->finish(PM_SUSPEND_MEM); - - OutS3: + /* + * Tasks are frozen and the notifiers have been called with + * PM_HIBERNATION_PREPARE + */ + error = suspend_devices_and_enter(PM_SUSPEND_MEM); mutex_unlock(&pm_mutex); break; @@ -386,19 +277,14 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, switch (arg) { case PMOPS_PREPARE: - if (hibernation_ops) { - data->platform_suspend = 1; - error = 0; - } else { - error = -ENOSYS; - } + data->platform_suspend = 1; + error = 0; break; case PMOPS_ENTER: - if (data->platform_suspend) { - kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); - error = hibernation_ops->enter(); - } + if (data->platform_suspend) + error = hibernation_platform_enter(); + break; case PMOPS_FINISH: diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4a1745f1dadf..82a558b655da 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -142,7 +142,7 @@ static int may_attach(struct task_struct *task) return -EPERM; smp_rmb(); if (task->mm) - dumpable = task->mm->dumpable; + dumpable = get_dumpable(task->mm); if (!dumpable && !capable(CAP_SYS_PTRACE)) return -EPERM; diff --git a/kernel/relay.c b/kernel/relay.c index a615a8f513fc..510fbbd7b500 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -80,7 +80,7 @@ static struct vm_operations_struct relay_file_mmap_ops = { * * Caller should already have grabbed mmap_sem. */ -int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) +static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) { unsigned long length = vma->vm_end - vma->vm_start; struct file *filp = vma->vm_file; @@ -145,7 +145,7 @@ depopulate: * * Returns channel buffer if successful, %NULL otherwise. */ -struct rchan_buf *relay_create_buf(struct rchan *chan) +static struct rchan_buf *relay_create_buf(struct rchan *chan) { struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); if (!buf) @@ -175,7 +175,7 @@ free_buf: * * Should only be called from kref_put(). */ -void relay_destroy_channel(struct kref *kref) +static void relay_destroy_channel(struct kref *kref) { struct rchan *chan = container_of(kref, struct rchan, kref); kfree(chan); @@ -185,7 +185,7 @@ void relay_destroy_channel(struct kref *kref) * relay_destroy_buf - destroy an rchan_buf struct and associated buffer * @buf: the buffer struct */ -void relay_destroy_buf(struct rchan_buf *buf) +static void relay_destroy_buf(struct rchan_buf *buf) { struct rchan *chan = buf->chan; unsigned int i; @@ -210,7 +210,7 @@ void relay_destroy_buf(struct rchan_buf *buf) * rchan_buf_struct and the channel buffer. Should only be called from * kref_put(). */ -void relay_remove_buf(struct kref *kref) +static void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); buf->chan->cb->remove_buf_file(buf->dentry); @@ -223,11 +223,10 @@ void relay_remove_buf(struct kref *kref) * * Returns 1 if the buffer is empty, 0 otherwise. */ -int relay_buf_empty(struct rchan_buf *buf) +static int relay_buf_empty(struct rchan_buf *buf) { return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; } -EXPORT_SYMBOL_GPL(relay_buf_empty); /** * relay_buf_full - boolean, is the channel buffer full? diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9a87886b022e..1ec620c03064 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -20,7 +20,7 @@ void down_read(struct rw_semaphore *sem) might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); - __down_read(sem); + LOCK_CONTENDED(sem, __down_read_trylock, __down_read); } EXPORT_SYMBOL(down_read); @@ -47,7 +47,7 @@ void down_write(struct rw_semaphore *sem) might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); - __down_write(sem); + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); } EXPORT_SYMBOL(down_write); @@ -111,7 +111,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) might_sleep(); rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); - __down_read(sem); + LOCK_CONTENDED(sem, __down_read_trylock, __down_read); } EXPORT_SYMBOL(down_read_nested); @@ -130,7 +130,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) might_sleep(); rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); - __down_write_nested(sem, subclass); + LOCK_CONTENDED(sem, __down_write_trylock, __down_write); } EXPORT_SYMBOL(down_write_nested); diff --git a/kernel/sched.c b/kernel/sched.c index cb31fb4a1379..645256b228c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -301,7 +301,7 @@ struct rq { struct lock_class_key rq_lock_key; }; -static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); static DEFINE_MUTEX(sched_hotcpu_mutex); static inline void check_preempt_curr(struct rq *rq, struct task_struct *p) diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 2c6c2bf85514..cd72424c2662 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -72,7 +72,7 @@ void __lockfunc _read_lock(rwlock_t *lock) { preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); } EXPORT_SYMBOL(_read_lock); @@ -88,8 +88,8 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_PROVE_LOCKING - _raw_spin_lock(lock); +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); #else _raw_spin_lock_flags(lock, &flags); #endif @@ -102,7 +102,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) local_irq_disable(); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock_irq); @@ -111,7 +111,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) local_bh_disable(); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock_bh); @@ -122,7 +122,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); return flags; } EXPORT_SYMBOL(_read_lock_irqsave); @@ -132,7 +132,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) local_irq_disable(); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); } EXPORT_SYMBOL(_read_lock_irq); @@ -141,7 +141,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) local_bh_disable(); preempt_disable(); rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - _raw_read_lock(lock); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); } EXPORT_SYMBOL(_read_lock_bh); @@ -152,7 +152,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) local_irq_save(flags); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); return flags; } EXPORT_SYMBOL(_write_lock_irqsave); @@ -162,7 +162,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) local_irq_disable(); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); } EXPORT_SYMBOL(_write_lock_irq); @@ -171,7 +171,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) local_bh_disable(); preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); } EXPORT_SYMBOL(_write_lock_bh); @@ -179,7 +179,7 @@ void __lockfunc _spin_lock(spinlock_t *lock) { preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock); @@ -188,7 +188,7 @@ void __lockfunc _write_lock(rwlock_t *lock) { preempt_disable(); rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - _raw_write_lock(lock); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); } EXPORT_SYMBOL(_write_lock); @@ -289,7 +289,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) { preempt_disable(); spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - _raw_spin_lock(lock); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } EXPORT_SYMBOL(_spin_lock_nested); @@ -305,8 +305,8 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_PROVE_SPIN_LOCKING - _raw_spin_lock(lock); +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); #else _raw_spin_lock_flags(lock, &flags); #endif diff --git a/kernel/sys.c b/kernel/sys.c index 18987c7f6add..08562f419768 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -100,6 +100,13 @@ struct pid *cad_pid; EXPORT_SYMBOL(cad_pid); /* + * If set, this is used for preparing the system to power off. + */ + +void (*pm_power_off_prepare)(void); +EXPORT_SYMBOL(pm_power_off_prepare); + +/* * Notifier list for kernel code which wants to be called * at shutdown. This is used to stop any idling DMA operations * and the like. @@ -867,6 +874,8 @@ EXPORT_SYMBOL_GPL(kernel_halt); void kernel_power_off(void) { kernel_shutdown_prepare(SYSTEM_POWER_OFF); + if (pm_power_off_prepare) + pm_power_off_prepare(); printk(KERN_EMERG "Power down.\n"); machine_power_off(); } @@ -1027,7 +1036,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) return -EPERM; } if (new_egid != old_egid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } if (rgid != (gid_t) -1 || @@ -1057,13 +1066,13 @@ asmlinkage long sys_setgid(gid_t gid) if (capable(CAP_SETGID)) { if (old_egid != gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->gid = current->egid = current->sgid = current->fsgid = gid; } else if ((gid == current->gid) || (gid == current->sgid)) { if (old_egid != gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->egid = current->fsgid = gid; @@ -1094,7 +1103,7 @@ static int set_user(uid_t new_ruid, int dumpclear) switch_uid(new_user); if (dumpclear) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->uid = new_ruid; @@ -1150,7 +1159,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) return -EAGAIN; if (new_euid != old_euid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = current->euid = new_euid; @@ -1200,7 +1209,7 @@ asmlinkage long sys_setuid(uid_t uid) return -EPERM; if (old_euid != uid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = current->euid = uid; @@ -1245,7 +1254,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) } if (euid != (uid_t) -1) { if (euid != current->euid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->euid = euid; @@ -1295,7 +1304,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) } if (egid != (gid_t) -1) { if (egid != current->egid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->egid = egid; @@ -1341,7 +1350,7 @@ asmlinkage long sys_setfsuid(uid_t uid) uid == current->suid || uid == current->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsuid = uid; @@ -1370,7 +1379,7 @@ asmlinkage long sys_setfsgid(gid_t gid) gid == current->sgid || gid == current->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); smp_wmb(); } current->fsgid = gid; @@ -2167,14 +2176,14 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = put_user(current->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = current->mm->dumpable; + error = get_dumpable(current->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - current->mm->dumpable = arg2; + set_dumpable(current->mm, arg2); break; case PR_SET_UNALIGN: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 44a1d699aad7..222299844ad1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -78,6 +78,7 @@ extern int percpu_pagelist_fraction; extern int compat_log; extern int maps_protect; extern int sysctl_stat_interval; +extern int audit_argv_kb; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -160,6 +161,8 @@ extern ctl_table inotify_table[]; int sysctl_legacy_va_layout; #endif +extern int prove_locking; +extern int lock_stat; /* The default sysctl tables: */ @@ -281,6 +284,26 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_PROVE_LOCKING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "prove_locking", + .data = &prove_locking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif +#ifdef CONFIG_LOCK_STAT + { + .ctl_name = CTL_UNNUMBERED, + .procname = "lock_stat", + .data = &lock_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = CTL_UNNUMBERED, .procname = "sched_features", @@ -306,6 +329,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_AUDITSYSCALL + { + .ctl_name = CTL_UNNUMBERED, + .procname = "audit_argv_kb", + .data = &audit_argv_kb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .ctl_name = KERN_CORE_PATTERN, .procname = "core_pattern", @@ -660,7 +693,7 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_ACPI_VIDEO_FLAGS, .procname = "acpi_video_flags", - .data = &acpi_video_flags, + .data = &acpi_realmode_flags, .maxlen = sizeof (unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -715,13 +748,17 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, - +/* + * NOTE: do not add new entries to this table unless you have read + * Documentation/sysctl/ctl_unnumbered.txt + */ { .ctl_name = 0 } }; /* Constants for minimum and maximum testing in vm_table. We use these as one-element integer vectors. */ static int zero; +static int two = 2; static int one_hundred = 100; @@ -1112,7 +1149,10 @@ static ctl_table fs_table[] = { .data = &lease_break_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &two, }, { .ctl_name = FS_AIO_NR, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 027d46c906e0..88c81026e003 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -401,7 +401,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void clocksource_adjust(struct clocksource *clock, s64 offset) +static void clocksource_adjust(s64 offset) { s64 error, interval = clock->cycle_interval; int adj; @@ -472,7 +472,7 @@ void update_wall_time(void) } /* correct the clock when NTP error is too big */ - clocksource_adjust(clock, offset); + clocksource_adjust(offset); /* store full nanoseconds into xtime */ xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; diff --git a/kernel/timer.c b/kernel/timer.c index dbc03ab14eed..6ce1952eea7d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -103,14 +103,14 @@ static inline tvec_base_t *tbase_get_base(tvec_base_t *base) static inline void timer_set_deferrable(struct timer_list *timer) { timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | - TBASE_DEFERRABLE_FLAG)); + TBASE_DEFERRABLE_FLAG)); } static inline void timer_set_base(struct timer_list *timer, tvec_base_t *new_base) { timer->base = (tvec_base_t *)((unsigned long)(new_base) | - tbase_get_deferrable(timer->base)); + tbase_get_deferrable(timer->base)); } /** @@ -445,10 +445,10 @@ EXPORT_SYMBOL(__mod_timer); void add_timer_on(struct timer_list *timer, int cpu) { tvec_base_t *base = per_cpu(tvec_bases, cpu); - unsigned long flags; + unsigned long flags; timer_stats_timer_set_start_info(timer); - BUG_ON(timer_pending(timer) || !timer->function); + BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); internal_add_timer(base, timer); @@ -627,7 +627,7 @@ static inline void __run_timers(tvec_base_t *base) while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list; struct list_head *head = &work_list; - int index = base->timer_jiffies & TVR_MASK; + int index = base->timer_jiffies & TVR_MASK; /* * Cascade timers: @@ -644,8 +644,8 @@ static inline void __run_timers(tvec_base_t *base) unsigned long data; timer = list_first_entry(head, struct timer_list,entry); - fn = timer->function; - data = timer->data; + fn = timer->function; + data = timer->data; timer_stats_account_timer(timer); @@ -689,8 +689,8 @@ static unsigned long __next_timer_interrupt(tvec_base_t *base) index = slot = timer_jiffies & TVR_MASK; do { list_for_each_entry(nte, base->tv1.vec + slot, entry) { - if (tbase_get_deferrable(nte->base)) - continue; + if (tbase_get_deferrable(nte->base)) + continue; found = 1; expires = nte->expires; @@ -834,7 +834,7 @@ void update_process_times(int user_tick) if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); scheduler_tick(); - run_posix_cpu_timers(p); + run_posix_cpu_timers(p); } /* @@ -909,7 +909,7 @@ static inline void update_times(unsigned long ticks) update_wall_time(); calc_load(ticks); } - + /* * The 64-bit jiffies value is not atomic - you MUST NOT read it * without sampling the sequence number in xtime_lock. @@ -1105,7 +1105,7 @@ asmlinkage long sys_gettid(void) /** * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill - */ + */ int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 640844024ffd..f3e0c2abcbd0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -283,6 +283,17 @@ config LOCKDEP select KALLSYMS select KALLSYMS_ALL +config LOCK_STAT + bool "Lock usage statisitics" + depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select LOCKDEP + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_LOCK_ALLOC + default n + help + This feature enables tracking lock contention points + config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index bd5ecbbafab1..6a80c784a8fb 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -33,25 +33,15 @@ static DEFINE_SPINLOCK(sequence_lock); static struct sock *uevent_sock; #endif -static char *action_to_string(enum kobject_action action) -{ - switch (action) { - case KOBJ_ADD: - return "add"; - case KOBJ_REMOVE: - return "remove"; - case KOBJ_CHANGE: - return "change"; - case KOBJ_OFFLINE: - return "offline"; - case KOBJ_ONLINE: - return "online"; - case KOBJ_MOVE: - return "move"; - default: - return NULL; - } -} +/* the strings here must match the enum in include/linux/kobject.h */ +const char *kobject_actions[] = { + "add", + "remove", + "change", + "move", + "online", + "offline", +}; /** * kobject_uevent_env - send an uevent with environmental data @@ -83,7 +73,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, pr_debug("%s\n", __FUNCTION__); - action_string = action_to_string(action); + action_string = kobject_actions[action]; if (!action_string) { pr_debug("kobject attempted to send uevent without action_string!\n"); return -EINVAL; diff --git a/mm/filemap.c b/mm/filemap.c index 5d5449f3d41c..49a6fe375d01 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -891,15 +891,20 @@ void do_generic_mapping_read(struct address_space *mapping, unsigned long nr, ret; cond_resched(); - if (index == next_index) - next_index = page_cache_readahead(mapping, &ra, filp, - index, last_index - index); - find_page: page = find_get_page(mapping, index); - if (unlikely(page == NULL)) { - handle_ra_miss(mapping, &ra, index); - goto no_cached_page; + if (!page) { + page_cache_sync_readahead(mapping, + &ra, filp, + index, last_index - index); + page = find_get_page(mapping, index); + if (unlikely(page == NULL)) + goto no_cached_page; + } + if (PageReadahead(page)) { + page_cache_async_readahead(mapping, + &ra, filp, page, + index, last_index - index); } if (!PageUptodate(page)) goto page_not_up_to_date; @@ -1051,6 +1056,7 @@ no_cached_page: out: *_ra = ra; + _ra->prev_index = prev_index; *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; if (cached_page) @@ -1301,62 +1307,62 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) #define MMAP_LOTSAMISS (100) /** - * filemap_nopage - read in file data for page fault handling - * @area: the applicable vm_area - * @address: target address to read in - * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL + * filemap_fault - read in file data for page fault handling + * @vma: vma in which the fault was taken + * @vmf: struct vm_fault containing details of the fault * - * filemap_nopage() is invoked via the vma operations vector for a + * filemap_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */ -struct page *filemap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int error; - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; struct page *page; - unsigned long size, pgoff; - int did_readaround = 0, majmin = VM_FAULT_MINOR; - - pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + unsigned long size; + int did_readaround = 0; + int ret = 0; -retry_all: size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) + if (vmf->pgoff >= size) goto outside_data_content; /* If we don't want any read-ahead, don't bother */ - if (VM_RandomReadHint(area)) + if (VM_RandomReadHint(vma)) goto no_cached_page; /* - * The readahead code wants to be told about each and every page - * so it can build and shrink its windows appropriately - * - * For sequential accesses, we use the generic readahead logic. - */ - if (VM_SequentialReadHint(area)) - page_cache_readahead(mapping, ra, file, pgoff, 1); - - /* * Do we have something in the page cache already? */ retry_find: - page = find_get_page(mapping, pgoff); + page = find_lock_page(mapping, vmf->pgoff); + /* + * For sequential accesses, we use the generic readahead logic. + */ + if (VM_SequentialReadHint(vma)) { + if (!page) { + page_cache_sync_readahead(mapping, ra, file, + vmf->pgoff, 1); + page = find_lock_page(mapping, vmf->pgoff); + if (!page) + goto no_cached_page; + } + if (PageReadahead(page)) { + page_cache_async_readahead(mapping, ra, file, page, + vmf->pgoff, 1); + } + } + if (!page) { unsigned long ra_pages; - if (VM_SequentialReadHint(area)) { - handle_ra_miss(mapping, ra, pgoff); - goto no_cached_page; - } ra->mmap_miss++; /* @@ -1371,7 +1377,7 @@ retry_find: * check did_readaround, as this is an inner loop. */ if (!did_readaround) { - majmin = VM_FAULT_MAJOR; + ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } did_readaround = 1; @@ -1379,11 +1385,11 @@ retry_find: if (ra_pages) { pgoff_t start = 0; - if (pgoff > ra_pages / 2) - start = pgoff - ra_pages / 2; + if (vmf->pgoff > ra_pages / 2) + start = vmf->pgoff - ra_pages / 2; do_page_cache_readahead(mapping, file, start, ra_pages); } - page = find_get_page(mapping, pgoff); + page = find_lock_page(mapping, vmf->pgoff); if (!page) goto no_cached_page; } @@ -1392,35 +1398,42 @@ retry_find: ra->mmap_hit++; /* - * Ok, found a page in the page cache, now we need to check - * that it's up-to-date. + * We have a locked page in the page cache, now we need to check + * that it's up-to-date. If not, it is going to be due to an error. */ - if (!PageUptodate(page)) + if (unlikely(!PageUptodate(page))) goto page_not_uptodate; -success: + /* Must recheck i_size under page lock */ + size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(vmf->pgoff >= size)) { + unlock_page(page); + goto outside_data_content; + } + /* * Found the page and have a reference on it. */ mark_page_accessed(page); - if (type) - *type = majmin; - return page; + ra->prev_index = page->index; + vmf->page = page; + return ret | VM_FAULT_LOCKED; outside_data_content: /* * An external ptracer can access pages that normally aren't * accessible.. */ - if (area->vm_mm == current->mm) - return NOPAGE_SIGBUS; + if (vma->vm_mm == current->mm) + return VM_FAULT_SIGBUS; + /* Fall through to the non-read-ahead case */ no_cached_page: /* * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, pgoff); + error = page_cache_read(file, vmf->pgoff); /* * The page we want has now been added to the page cache. @@ -1436,12 +1449,13 @@ no_cached_page: * to schedule I/O. */ if (error == -ENOMEM) - return NOPAGE_OOM; - return NOPAGE_SIGBUS; + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; page_not_uptodate: + /* IO error path */ if (!did_readaround) { - majmin = VM_FAULT_MAJOR; + ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } @@ -1451,217 +1465,21 @@ page_not_uptodate: * because there really aren't any performance issues here * and we need to check for errors. */ - lock_page(page); - - /* Somebody truncated the page on us? */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto retry_all; - } - - /* Somebody else successfully read it in? */ - if (PageUptodate(page)) { - unlock_page(page); - goto success; - } ClearPageError(page); error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (PageUptodate(page)) - goto success; - } else if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry_find; - } - - /* - * Things didn't work out. Return zero to tell the - * mm layer so, possibly freeing the page cache page first. - */ - shrink_readahead_size_eio(file, ra); page_cache_release(page); - return NOPAGE_SIGBUS; -} -EXPORT_SYMBOL(filemap_nopage); - -static struct page * filemap_getpage(struct file *file, unsigned long pgoff, - int nonblock) -{ - struct address_space *mapping = file->f_mapping; - struct page *page; - int error; - /* - * Do we have something in the page cache already? - */ -retry_find: - page = find_get_page(mapping, pgoff); - if (!page) { - if (nonblock) - return NULL; - goto no_cached_page; - } - - /* - * Ok, found a page in the page cache, now we need to check - * that it's up-to-date. - */ - if (!PageUptodate(page)) { - if (nonblock) { - page_cache_release(page); - return NULL; - } - goto page_not_uptodate; - } - -success: - /* - * Found the page and have a reference on it. - */ - mark_page_accessed(page); - return page; - -no_cached_page: - error = page_cache_read(file, pgoff); - - /* - * The page we want has now been added to the page cache. - * In the unlikely event that someone removed it in the - * meantime, we'll just come back here and read it again. - */ - if (error >= 0) + if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; - /* - * An error return from page_cache_read can result if the - * system is low on memory, or a problem occurs while trying - * to schedule I/O. - */ - return NULL; - -page_not_uptodate: - lock_page(page); - - /* Did it get truncated while we waited for it? */ - if (!page->mapping) { - unlock_page(page); - goto err; - } - - /* Did somebody else get it up-to-date? */ - if (PageUptodate(page)) { - unlock_page(page); - goto success; - } - - error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (PageUptodate(page)) - goto success; - } else if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry_find; - } - - /* - * Umm, take care of errors if the page isn't up-to-date. - * Try to re-read it _once_. We do this synchronously, - * because there really aren't any performance issues here - * and we need to check for errors. - */ - lock_page(page); - - /* Somebody truncated the page on us? */ - if (!page->mapping) { - unlock_page(page); - goto err; - } - /* Somebody else successfully read it in? */ - if (PageUptodate(page)) { - unlock_page(page); - goto success; - } - - ClearPageError(page); - error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (PageUptodate(page)) - goto success; - } else if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry_find; - } - - /* - * Things didn't work out. Return zero to tell the - * mm layer so, possibly freeing the page cache page first. - */ -err: - page_cache_release(page); - - return NULL; -} - -int filemap_populate(struct vm_area_struct *vma, unsigned long addr, - unsigned long len, pgprot_t prot, unsigned long pgoff, - int nonblock) -{ - struct file *file = vma->vm_file; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - unsigned long size; - struct mm_struct *mm = vma->vm_mm; - struct page *page; - int err; - - if (!nonblock) - force_page_cache_readahead(mapping, vma->vm_file, - pgoff, len >> PAGE_CACHE_SHIFT); - -repeat: - size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff + (len >> PAGE_CACHE_SHIFT) > size) - return -EINVAL; - - page = filemap_getpage(file, pgoff, nonblock); - - /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as - * done in shmem_populate calling shmem_getpage */ - if (!page && !nonblock) - return -ENOMEM; - - if (page) { - err = install_page(mm, vma, addr, page, prot); - if (err) { - page_cache_release(page); - return err; - } - } else if (vma->vm_flags & VM_NONLINEAR) { - /* No page was found just because we can't read it in now (being - * here implies nonblock != 0), but the page may exist, so set - * the PTE to fault it in later. */ - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } - - len -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; - if (len) - goto repeat; - - return 0; + /* Things didn't work out. Return zero to tell the mm layer so. */ + shrink_readahead_size_eio(file, ra); + return VM_FAULT_SIGBUS; } -EXPORT_SYMBOL(filemap_populate); +EXPORT_SYMBOL(filemap_fault); struct vm_operations_struct generic_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, + .fault = filemap_fault, }; /* This is used for a general mmap of a disk file */ @@ -1674,6 +1492,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 65ffc321f0c0..53ee6a299635 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -205,62 +205,58 @@ __xip_unmap (struct address_space * mapping, } /* - * xip_nopage() is invoked via the vma operations vector for a + * xip_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * - * This function is derived from filemap_nopage, but used for execute in place + * This function is derived from filemap_fault, but used for execute in place */ -static struct page * -xip_file_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf) { struct file *file = area->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct page *page; - unsigned long size, pgoff, endoff; + pgoff_t size; - pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; - endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; + /* XXX: are VM_FAULT_ codes OK? */ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) - return NOPAGE_SIGBUS; + if (vmf->pgoff >= size) + return VM_FAULT_SIGBUS; - page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); + page = mapping->a_ops->get_xip_page(mapping, + vmf->pgoff*(PAGE_SIZE/512), 0); if (!IS_ERR(page)) goto out; if (PTR_ERR(page) != -ENODATA) - return NOPAGE_SIGBUS; + return VM_FAULT_OOM; /* sparse block */ if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { /* maybe shared writable, allocate new block */ - page = mapping->a_ops->get_xip_page (mapping, - pgoff*(PAGE_SIZE/512), 1); + page = mapping->a_ops->get_xip_page(mapping, + vmf->pgoff*(PAGE_SIZE/512), 1); if (IS_ERR(page)) - return NOPAGE_SIGBUS; + return VM_FAULT_SIGBUS; /* unmap page at pgoff from all other vmas */ - __xip_unmap(mapping, pgoff); + __xip_unmap(mapping, vmf->pgoff); } else { /* not shared and writable, use xip_sparse_page() */ page = xip_sparse_page(); if (!page) - return NOPAGE_OOM; + return VM_FAULT_OOM; } out: page_cache_get(page); - return page; + vmf->page = page; + return 0; } static struct vm_operations_struct xip_file_vm_ops = { - .nopage = xip_file_nopage, + .fault = xip_file_fault, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -269,6 +265,7 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma) file_accessed(file); vma->vm_ops = &xip_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } EXPORT_SYMBOL_GPL(xip_file_mmap); diff --git a/mm/fremap.c b/mm/fremap.c index 4e3f53dd5fd4..c395b1abf082 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -20,13 +20,14 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> -static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, +static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; - struct page *page = NULL; if (pte_present(pte)) { + struct page *page; + flush_cache_page(vma, addr, pte_pfn(pte)); pte = ptep_clear_flush(vma, addr, ptep); page = vm_normal_page(vma, addr, pte); @@ -35,68 +36,21 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, set_page_dirty(page); page_remove_rmap(page, vma); page_cache_release(page); + update_hiwater_rss(mm); + dec_mm_counter(mm, file_rss); } } else { if (!pte_file(pte)) free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear_not_present_full(mm, addr, ptep, 0); } - return !!page; } /* - * Install a file page to a given virtual memory address, release any - * previously existing mapping. - */ -int install_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, struct page *page, pgprot_t prot) -{ - struct inode *inode; - pgoff_t size; - int err = -ENOMEM; - pte_t *pte; - pte_t pte_val; - spinlock_t *ptl; - - pte = get_locked_pte(mm, addr, &ptl); - if (!pte) - goto out; - - /* - * This page may have been truncated. Tell the - * caller about it. - */ - err = -EINVAL; - inode = vma->vm_file->f_mapping->host; - size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (!page->mapping || page->index >= size) - goto unlock; - err = -ENOMEM; - if (page_mapcount(page) > INT_MAX/2) - goto unlock; - - if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) - inc_mm_counter(mm, file_rss); - - flush_icache_page(vma, page); - pte_val = mk_pte(page, prot); - set_pte_at(mm, addr, pte, pte_val); - page_add_file_rmap(page); - update_mmu_cache(vma, addr, pte_val); - lazy_mmu_prot_update(pte_val); - err = 0; -unlock: - pte_unmap_unlock(pte, ptl); -out: - return err; -} -EXPORT_SYMBOL(install_page); - -/* * Install a file pte to a given virtual memory address, release any * previously existing mapping. */ -int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, +static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot) { int err = -ENOMEM; @@ -107,10 +61,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte) goto out; - if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) { - update_hiwater_rss(mm); - dec_mm_counter(mm, file_rss); - } + if (!pte_none(*pte)) + zap_pte(mm, vma, addr, pte); set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); /* @@ -126,6 +78,25 @@ out: return err; } +static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long size, pgoff_t pgoff) +{ + int err; + + do { + err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); + if (err) + return err; + + size -= PAGE_SIZE; + addr += PAGE_SIZE; + pgoff++; + } while (size); + + return 0; + +} + /*** * sys_remap_file_pages - remap arbitrary pages of a shared backing store * file within an existing vma. @@ -183,41 +154,77 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, * the single existing vma. vm_private_data is used as a * swapout cursor in a VM_NONLINEAR vma. */ - if (vma && (vma->vm_flags & VM_SHARED) && - (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && - vma->vm_ops && vma->vm_ops->populate && - end > start && start >= vma->vm_start && - end <= vma->vm_end) { - - /* Must set VM_NONLINEAR before any pages are populated. */ - if (pgoff != linear_page_index(vma, start) && - !(vma->vm_flags & VM_NONLINEAR)) { - if (!has_write_lock) { - up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - has_write_lock = 1; - goto retry; - } - mapping = vma->vm_file->f_mapping; - spin_lock(&mapping->i_mmap_lock); - flush_dcache_mmap_lock(mapping); - vma->vm_flags |= VM_NONLINEAR; - vma_prio_tree_remove(vma, &mapping->i_mmap); - vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); - flush_dcache_mmap_unlock(mapping); - spin_unlock(&mapping->i_mmap_lock); - } + if (!vma || !(vma->vm_flags & VM_SHARED)) + goto out; - err = vma->vm_ops->populate(vma, start, size, - vma->vm_page_prot, - pgoff, flags & MAP_NONBLOCK); + if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) + goto out; + + if (!vma->vm_flags & VM_CAN_NONLINEAR) + goto out; + if (end <= start || start < vma->vm_start || end > vma->vm_end) + goto out; + + /* Must set VM_NONLINEAR before any pages are populated. */ + if (!(vma->vm_flags & VM_NONLINEAR)) { + /* Don't need a nonlinear mapping, exit success */ + if (pgoff == linear_page_index(vma, start)) { + err = 0; + goto out; + } + + if (!has_write_lock) { + up_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); + has_write_lock = 1; + goto retry; + } + mapping = vma->vm_file->f_mapping; /* - * We can't clear VM_NONLINEAR because we'd have to do - * it after ->populate completes, and that would prevent - * downgrading the lock. (Locks can't be upgraded). + * page_mkclean doesn't work on nonlinear vmas, so if + * dirty pages need to be accounted, emulate with linear + * vmas. */ + if (mapping_cap_account_dirty(mapping)) { + unsigned long addr; + + flags &= MAP_NONBLOCK; + addr = mmap_region(vma->vm_file, start, size, + flags, vma->vm_flags, pgoff, 1); + if (IS_ERR_VALUE(addr)) { + err = addr; + } else { + BUG_ON(addr != start); + err = 0; + } + goto out; + } + spin_lock(&mapping->i_mmap_lock); + flush_dcache_mmap_lock(mapping); + vma->vm_flags |= VM_NONLINEAR; + vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); + flush_dcache_mmap_unlock(mapping); + spin_unlock(&mapping->i_mmap_lock); + } + + err = populate_range(mm, vma, start, size, pgoff); + if (!err && !(flags & MAP_NONBLOCK)) { + if (unlikely(has_write_lock)) { + downgrade_write(&mm->mmap_sem); + has_write_lock = 0; + } + make_pages_present(start, start+size); } + + /* + * We can't clear VM_NONLINEAR because we'd have to do + * it after ->populate completes, and that would prevent + * downgrading the lock. (Locks can't be upgraded). + */ + +out: if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6912bbf33faa..f127940ec24f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -78,16 +78,13 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, for (z = zonelist->zones; *z; z++) { nid = zone_to_nid(*z); if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && - !list_empty(&hugepage_freelists[nid])) - break; - } - - if (*z) { - page = list_entry(hugepage_freelists[nid].next, - struct page, lru); - list_del(&page->lru); - free_huge_pages--; - free_huge_pages_node[nid]--; + !list_empty(&hugepage_freelists[nid])) { + page = list_entry(hugepage_freelists[nid].next, + struct page, lru); + list_del(&page->lru); + free_huge_pages--; + free_huge_pages_node[nid]--; + } } return page; } @@ -107,15 +104,19 @@ static int alloc_fresh_huge_page(void) { static int prev_nid; struct page *page; - static DEFINE_SPINLOCK(nid_lock); int nid; - spin_lock(&nid_lock); + /* + * Copy static prev_nid to local nid, work on that, then copy it + * back to prev_nid afterwards: otherwise there's a window in which + * a racer might pass invalid nid MAX_NUMNODES to alloc_pages_node. + * But we don't need to use a spin_lock here: it really doesn't + * matter if occasionally a racer chooses the same nid as we do. + */ nid = next_node(prev_nid, node_online_map); if (nid == MAX_NUMNODES) nid = first_node(node_online_map); prev_nid = nid; - spin_unlock(&nid_lock); page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, HUGETLB_PAGE_ORDER); @@ -207,7 +208,7 @@ static void update_and_free_page(struct page *page) 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 1 << PG_private | 1<< PG_writeback); } - page[1].lru.next = NULL; + set_compound_page_dtor(page, NULL); set_page_refcounted(page); __free_pages(page, HUGETLB_PAGE_ORDER); } @@ -316,15 +317,14 @@ unsigned long hugetlb_total_pages(void) * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get * this far. */ -static struct page *hugetlb_nopage(struct vm_area_struct *vma, - unsigned long address, int *unused) +static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { BUG(); - return NULL; + return 0; } struct vm_operations_struct hugetlb_vm_ops = { - .nopage = hugetlb_nopage, + .fault = hugetlb_vm_op_fault, }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, @@ -470,7 +470,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, avoidcopy = (page_count(old_page) == 1); if (avoidcopy) { set_huge_ptep_writable(vma, address, ptep); - return VM_FAULT_MINOR; + return 0; } page_cache_get(old_page); @@ -495,7 +495,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, } page_cache_release(new_page); page_cache_release(old_page); - return VM_FAULT_MINOR; + return 0; } static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, @@ -552,7 +552,7 @@ retry: if (idx >= size) goto backout; - ret = VM_FAULT_MINOR; + ret = 0; if (!pte_none(*ptep)) goto backout; @@ -603,7 +603,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } - ret = VM_FAULT_MINOR; + ret = 0; spin_lock(&mm->page_table_lock); /* Check for a racing update before calling hugetlb_cow */ @@ -642,7 +642,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(&mm->page_table_lock); ret = hugetlb_fault(mm, vma, vaddr, 0); spin_lock(&mm->page_table_lock); - if (ret == VM_FAULT_MINOR) + if (!(ret & VM_FAULT_MAJOR)) continue; remainder = 0; diff --git a/mm/memory.c b/mm/memory.c index 9c6ff7fffdc8..8aace3db3a54 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1047,7 +1047,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (pages) foll_flags |= FOLL_GET; if (!write && !(vma->vm_flags & VM_LOCKED) && - (!vma->vm_ops || !vma->vm_ops->nopage)) + (!vma->vm_ops || (!vma->vm_ops->nopage && + !vma->vm_ops->fault))) foll_flags |= FOLL_ANON; do { @@ -1067,31 +1068,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; - ret = __handle_mm_fault(mm, vma, start, + ret = handle_mm_fault(mm, vma, start, foll_flags & FOLL_WRITE); + if (ret & VM_FAULT_ERROR) { + if (ret & VM_FAULT_OOM) + return i ? i : -ENOMEM; + else if (ret & VM_FAULT_SIGBUS) + return i ? i : -EFAULT; + BUG(); + } + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + /* - * The VM_FAULT_WRITE bit tells us that do_wp_page has - * broken COW when necessary, even if maybe_mkwrite - * decided not to set pte_write. We can thus safely do - * subsequent page lookups as if they were reads. + * The VM_FAULT_WRITE bit tells us that + * do_wp_page has broken COW when necessary, + * even if maybe_mkwrite decided not to set + * pte_write. We can thus safely do subsequent + * page lookups as if they were reads. */ if (ret & VM_FAULT_WRITE) foll_flags &= ~FOLL_WRITE; - - switch (ret & ~VM_FAULT_WRITE) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - return i ? i : -EFAULT; - case VM_FAULT_OOM: - return i ? i : -ENOMEM; - default: - BUG(); - } + cond_resched(); } if (pages) { @@ -1638,7 +1638,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *old_page, *new_page; pte_t entry; - int reuse = 0, ret = VM_FAULT_MINOR; + int reuse = 0, ret = 0; struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); @@ -1765,6 +1765,15 @@ gotten: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { + /* + * Yes, Virginia, this is actually required to prevent a race + * with clear_page_dirty_for_io() from clearing the page dirty + * bit after it clear all dirty ptes, but before a racing + * do_wp_page installs a dirty pte. + * + * do_no_page is protected similarly. + */ + wait_on_page_locked(dirty_page); set_page_dirty_balance(dirty_page); put_page(dirty_page); } @@ -1831,6 +1840,13 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long restart_addr; int need_break; + /* + * files that support invalidating or truncating portions of the + * file from under mmaped areas must have their ->fault function + * return a locked page (and set VM_FAULT_LOCKED in the return). + * This provides synchronisation against concurrent unmapping here. + */ + again: restart_addr = vma->vm_truncate_count; if (is_restart_addr(restart_addr) && start_addr < restart_addr) { @@ -1959,17 +1975,8 @@ void unmap_mapping_range(struct address_space *mapping, spin_lock(&mapping->i_mmap_lock); - /* serialize i_size write against truncate_count write */ - smp_wmb(); - /* Protect against page faults, and endless unmapping loops */ + /* Protect against endless unmapping loops */ mapping->truncate_count++; - /* - * For archs where spin_lock has inclusive semantics like ia64 - * this smp_mb() will prevent to read pagetable contents - * before the truncate_count increment is visible to - * other cpus. - */ - smp_mb(); if (unlikely(is_restart_addr(mapping->truncate_count))) { if (mapping->truncate_count == 0) reset_vma_truncate_counts(mapping); @@ -2008,8 +2015,18 @@ int vmtruncate(struct inode * inode, loff_t offset) if (IS_SWAPFILE(inode)) goto out_busy; i_size_write(inode, offset); + + /* + * unmap_mapping_range is called twice, first simply for efficiency + * so that truncate_inode_pages does fewer single-page unmaps. However + * after this first call, and before truncate_inode_pages finishes, + * it is possible for private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second unmap_mapping_range + * call must be made for correctness. + */ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); goto out_truncate; do_expand: @@ -2049,6 +2066,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) down_write(&inode->i_alloc_sem); unmap_mapping_range(mapping, offset, (end - offset), 1); truncate_inode_pages_range(mapping, offset, end); + unmap_mapping_range(mapping, offset, (end - offset), 1); inode->i_op->truncate_range(inode, offset, end); up_write(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); @@ -2130,7 +2148,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; swp_entry_t entry; pte_t pte; - int ret = VM_FAULT_MINOR; + int ret = 0; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) goto out; @@ -2198,15 +2216,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(page); if (write_access) { + /* XXX: We could OR the do_wp_page code with this one? */ if (do_wp_page(mm, vma, address, - page_table, pmd, ptl, pte) == VM_FAULT_OOM) + page_table, pmd, ptl, pte) & VM_FAULT_OOM) ret = VM_FAULT_OOM; goto out; } /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); - lazy_mmu_prot_update(pte); unlock: pte_unmap_unlock(page_table, ptl); out: @@ -2271,7 +2289,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, lazy_mmu_prot_update(entry); unlock: pte_unmap_unlock(page_table, ptl); - return VM_FAULT_MINOR; + return 0; release: page_cache_release(page); goto unlock; @@ -2280,10 +2298,10 @@ oom: } /* - * do_no_page() tries to create a new page mapping. It aggressively + * __do_fault() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if - * the "write_access" parameter is true in order to avoid the next - * page fault. + * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid + * the next page fault. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. @@ -2292,90 +2310,100 @@ oom: * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, +static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, - int write_access) + pgoff_t pgoff, unsigned int flags, pte_t orig_pte) { spinlock_t *ptl; - struct page *new_page; - struct address_space *mapping = NULL; + struct page *page; pte_t entry; - unsigned int sequence = 0; - int ret = VM_FAULT_MINOR; int anon = 0; struct page *dirty_page = NULL; + struct vm_fault vmf; + int ret; + + vmf.virtual_address = (void __user *)(address & PAGE_MASK); + vmf.pgoff = pgoff; + vmf.flags = flags; + vmf.page = NULL; pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); - if (vma->vm_file) { - mapping = vma->vm_file->f_mapping; - sequence = mapping->truncate_count; - smp_rmb(); /* serializes i_size against truncate_count */ + if (likely(vma->vm_ops->fault)) { + ret = vma->vm_ops->fault(vma, &vmf); + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + return ret; + } else { + /* Legacy ->nopage path */ + ret = 0; + vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); + /* no page was available -- either SIGBUS or OOM */ + if (unlikely(vmf.page == NOPAGE_SIGBUS)) + return VM_FAULT_SIGBUS; + else if (unlikely(vmf.page == NOPAGE_OOM)) + return VM_FAULT_OOM; } -retry: - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); + /* - * No smp_rmb is needed here as long as there's a full - * spin_lock/unlock sequence inside the ->nopage callback - * (for the pagecache lookup) that acts as an implicit - * smp_mb() and prevents the i_size read to happen - * after the next truncate_count read. + * For consistency in subsequent calls, make the faulted page always + * locked. */ - - /* no page was available -- either SIGBUS, OOM or REFAULT */ - if (unlikely(new_page == NOPAGE_SIGBUS)) - return VM_FAULT_SIGBUS; - else if (unlikely(new_page == NOPAGE_OOM)) - return VM_FAULT_OOM; - else if (unlikely(new_page == NOPAGE_REFAULT)) - return VM_FAULT_MINOR; + if (unlikely(!(ret & VM_FAULT_LOCKED))) + lock_page(vmf.page); + else + VM_BUG_ON(!PageLocked(vmf.page)); /* * Should we do an early C-O-W break? */ - if (write_access) { + page = vmf.page; + if (flags & FAULT_FLAG_WRITE) { if (!(vma->vm_flags & VM_SHARED)) { - struct page *page; - - if (unlikely(anon_vma_prepare(vma))) - goto oom; + anon = 1; + if (unlikely(anon_vma_prepare(vma))) { + ret = VM_FAULT_OOM; + goto out; + } page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); - if (!page) - goto oom; - copy_user_highpage(page, new_page, address, vma); - page_cache_release(new_page); - new_page = page; - anon = 1; - + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } + copy_user_highpage(page, vmf.page, address, vma); } else { - /* if the page will be shareable, see if the backing + /* + * If the page will be shareable, see if the backing * address space wants to know that the page is about - * to become writable */ - if (vma->vm_ops->page_mkwrite && - vma->vm_ops->page_mkwrite(vma, new_page) < 0 - ) { - page_cache_release(new_page); - return VM_FAULT_SIGBUS; + * to become writable + */ + if (vma->vm_ops->page_mkwrite) { + unlock_page(page); + if (vma->vm_ops->page_mkwrite(vma, page) < 0) { + ret = VM_FAULT_SIGBUS; + anon = 1; /* no anon but release vmf.page */ + goto out_unlocked; + } + lock_page(page); + /* + * XXX: this is not quite right (racy vs + * invalidate) to unlock and relock the page + * like this, however a better fix requires + * reworking page_mkwrite locking API, which + * is better done later. + */ + if (!page->mapping) { + ret = 0; + anon = 1; /* no anon but release vmf.page */ + goto out; + } } } + } page_table = pte_offset_map_lock(mm, pmd, address, &ptl); - /* - * For a file-backed vma, someone could have truncated or otherwise - * invalidated this page. If unmap_mapping_range got called, - * retry getting the page. - */ - if (mapping && unlikely(sequence != mapping->truncate_count)) { - pte_unmap_unlock(page_table, ptl); - page_cache_release(new_page); - cond_resched(); - sequence = mapping->truncate_count; - smp_rmb(); - goto retry; - } /* * This silly early PAGE_DIRTY setting removes a race @@ -2388,45 +2416,63 @@ retry: * handle that later. */ /* Only go through if we didn't race with anybody else... */ - if (pte_none(*page_table)) { - flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); - if (write_access) + if (likely(pte_same(*page_table, orig_pte))) { + flush_icache_page(vma, page); + entry = mk_pte(page, vma->vm_page_prot); + if (flags & FAULT_FLAG_WRITE) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte_at(mm, address, page_table, entry); if (anon) { - inc_mm_counter(mm, anon_rss); - lru_cache_add_active(new_page); - page_add_new_anon_rmap(new_page, vma, address); + inc_mm_counter(mm, anon_rss); + lru_cache_add_active(page); + page_add_new_anon_rmap(page, vma, address); } else { inc_mm_counter(mm, file_rss); - page_add_file_rmap(new_page); - if (write_access) { - dirty_page = new_page; + page_add_file_rmap(page); + if (flags & FAULT_FLAG_WRITE) { + dirty_page = page; get_page(dirty_page); } } + + /* no need to invalidate: a not-present page won't be cached */ + update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); } else { - /* One of our sibling threads was faster, back out. */ - page_cache_release(new_page); - goto unlock; + if (anon) + page_cache_release(page); + else + anon = 1; /* no anon but release faulted_page */ } - /* no need to invalidate: a not-present page shouldn't be cached */ - update_mmu_cache(vma, address, entry); - lazy_mmu_prot_update(entry); -unlock: pte_unmap_unlock(page_table, ptl); - if (dirty_page) { + +out: + unlock_page(vmf.page); +out_unlocked: + if (anon) + page_cache_release(vmf.page); + else if (dirty_page) { set_page_dirty_balance(dirty_page); put_page(dirty_page); } + return ret; -oom: - page_cache_release(new_page); - return VM_FAULT_OOM; } +static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + int write_access, pte_t orig_pte) +{ + pgoff_t pgoff = (((address & PAGE_MASK) + - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; + unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); + + return __do_fault(mm, vma, address, page_table, pmd, pgoff, + flags, orig_pte); +} + + /* * do_no_pfn() tries to create a new page mapping for a page without * a struct_page backing it @@ -2450,7 +2496,6 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; pte_t entry; unsigned long pfn; - int ret = VM_FAULT_MINOR; pte_unmap(page_table); BUG_ON(!(vma->vm_flags & VM_PFNMAP)); @@ -2462,7 +2507,7 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, else if (unlikely(pfn == NOPFN_SIGBUS)) return VM_FAULT_SIGBUS; else if (unlikely(pfn == NOPFN_REFAULT)) - return VM_FAULT_MINOR; + return 0; page_table = pte_offset_map_lock(mm, pmd, address, &ptl); @@ -2474,7 +2519,7 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, address, page_table, entry); } pte_unmap_unlock(page_table, ptl); - return ret; + return 0; } /* @@ -2486,33 +2531,30 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, +static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, int write_access, pte_t orig_pte) { + unsigned int flags = FAULT_FLAG_NONLINEAR | + (write_access ? FAULT_FLAG_WRITE : 0); pgoff_t pgoff; - int err; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) - return VM_FAULT_MINOR; + return 0; - if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { + if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || + !(vma->vm_flags & VM_CAN_NONLINEAR))) { /* * Page table corrupted: show pte and kill process. */ print_bad_pte(vma, orig_pte, address); return VM_FAULT_OOM; } - /* We can then assume vm->vm_ops && vma->vm_ops->populate */ pgoff = pte_to_pgoff(orig_pte); - err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, - vma->vm_page_prot, pgoff, 0); - if (err == -ENOMEM) - return VM_FAULT_OOM; - if (err) - return VM_FAULT_SIGBUS; - return VM_FAULT_MAJOR; + + return __do_fault(mm, vma, address, page_table, pmd, pgoff, + flags, orig_pte); } /* @@ -2539,10 +2581,9 @@ static inline int handle_pte_fault(struct mm_struct *mm, if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { - if (vma->vm_ops->nopage) - return do_no_page(mm, vma, address, - pte, pmd, - write_access); + if (vma->vm_ops->fault || vma->vm_ops->nopage) + return do_linear_fault(mm, vma, address, + pte, pmd, write_access, entry); if (unlikely(vma->vm_ops->nopfn)) return do_no_pfn(mm, vma, address, pte, pmd, write_access); @@ -2551,7 +2592,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, pte, pmd, write_access); } if (pte_file(entry)) - return do_file_page(mm, vma, address, + return do_nonlinear_fault(mm, vma, address, pte, pmd, write_access, entry); return do_swap_page(mm, vma, address, pte, pmd, write_access, entry); @@ -2583,13 +2624,13 @@ static inline int handle_pte_fault(struct mm_struct *mm, } unlock: pte_unmap_unlock(pte, ptl); - return VM_FAULT_MINOR; + return 0; } /* * By the time we get here, we already hold the mm semaphore */ -int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, +int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) { pgd_t *pgd; @@ -2618,7 +2659,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, write_access); } -EXPORT_SYMBOL_GPL(__handle_mm_fault); +EXPORT_SYMBOL_GPL(handle_mm_fault); #ifndef __PAGETABLE_PUD_FOLDED /* @@ -2824,3 +2865,4 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return buf - old_buf; } +EXPORT_SYMBOL_GPL(access_process_vm); diff --git a/mm/mmap.c b/mm/mmap.c index 144b4a290f2c..7afc7a7cec6f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1165,12 +1165,8 @@ out: mm->locked_vm += len >> PAGE_SHIFT; make_pages_present(addr, addr + len); } - if (flags & MAP_POPULATE) { - up_write(&mm->mmap_sem); - sys_remap_file_pages(addr, len, 0, - pgoff, flags & MAP_NONBLOCK); - down_write(&mm->mmap_sem); - } + if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) + make_pages_present(addr, addr + len); return addr; unmap_and_free_vma: @@ -1575,33 +1571,11 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ -#ifdef CONFIG_STACK_GROWSUP -int expand_stack(struct vm_area_struct *vma, unsigned long address) -{ - return expand_upwards(vma, address); -} - -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) -{ - struct vm_area_struct *vma, *prev; - - addr &= PAGE_MASK; - vma = find_vma_prev(mm, addr, &prev); - if (vma && (vma->vm_start <= addr)) - return vma; - if (!prev || expand_stack(prev, addr)) - return NULL; - if (prev->vm_flags & VM_LOCKED) { - make_pages_present(addr, prev->vm_end); - } - return prev; -} -#else /* * vma is the first one with address < vma->vm_start. Have to extend vma. */ -int expand_stack(struct vm_area_struct *vma, unsigned long address) +static inline int expand_downwards(struct vm_area_struct *vma, + unsigned long address) { int error; @@ -1638,6 +1612,38 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address) return error; } +int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address) +{ + return expand_downwards(vma, address); +} + +#ifdef CONFIG_STACK_GROWSUP +int expand_stack(struct vm_area_struct *vma, unsigned long address) +{ + return expand_upwards(vma, address); +} + +struct vm_area_struct * +find_extend_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma, *prev; + + addr &= PAGE_MASK; + vma = find_vma_prev(mm, addr, &prev); + if (vma && (vma->vm_start <= addr)) + return vma; + if (!prev || expand_stack(prev, addr)) + return NULL; + if (prev->vm_flags & VM_LOCKED) + make_pages_present(addr, prev->vm_end); + return prev; +} +#else +int expand_stack(struct vm_area_struct *vma, unsigned long address) +{ + return expand_downwards(vma, address); +} + struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr) { @@ -1655,9 +1661,8 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) start = vma->vm_start; if (expand_stack(vma, addr)) return NULL; - if (vma->vm_flags & VM_LOCKED) { + if (vma->vm_flags & VM_LOCKED) make_pages_present(addr, start); - } return vma; } #endif diff --git a/mm/mprotect.c b/mm/mprotect.c index 3b8f3c0c63f3..e8346c30abec 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -128,7 +128,7 @@ static void change_protection(struct vm_area_struct *vma, flush_tlb_range(vma, start, end); } -static int +int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags) { diff --git a/mm/mremap.c b/mm/mremap.c index bc7c52efc71b..8ea5c2412c6e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -120,7 +120,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, #define LATENCY_LIMIT (64 * PAGE_SIZE) -static unsigned long move_page_tables(struct vm_area_struct *vma, +unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len) { diff --git a/mm/nommu.c b/mm/nommu.c index 8bbbf147a794..1b105d28949f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1341,11 +1341,10 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -struct page *filemap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { BUG(); - return NULL; + return 0; } /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 886ea0d5a136..63512a9ed57e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -918,6 +918,9 @@ int clear_page_dirty_for_io(struct page *page) { struct address_space *mapping = page_mapping(page); + BUG_ON(!PageLocked(page)); + + ClearPageReclaim(page); if (mapping && mapping_cap_account_dirty(mapping)) { /* * Yes, Virginia, this is indeed insane. @@ -943,14 +946,19 @@ int clear_page_dirty_for_io(struct page *page) * We basically use the page "master dirty bit" * as a serialization point for all the different * threads doing their things. - * - * FIXME! We still have a race here: if somebody - * adds the page back to the page tables in - * between the "page_mkclean()" and the "TestClearPageDirty()", - * we might have it mapped without the dirty bit set. */ if (page_mkclean(page)) set_page_dirty(page); + /* + * We carefully synchronise fault handlers against + * installing a dirty pte and marking the page dirty + * at this point. We do this by having them hold the + * page lock at some point after installing their + * pte, but before marking the page dirty. + * Pages are always locked coming in here, so we get + * the desired exclusion. See mm/memory.c:do_wp_page() + * for more comments. + */ if (TestClearPageDirty(page)) { dec_zone_page_state(page, NR_FILE_DIRTY); return 1; @@ -979,6 +987,8 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } + if (ret) + dec_zone_page_state(page, NR_WRITEBACK); return ret; } @@ -1004,6 +1014,8 @@ int test_set_page_writeback(struct page *page) } else { ret = TestSetPageWriteback(page); } + if (!ret) + inc_zone_page_state(page, NR_WRITEBACK); return ret; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e2a10b957f23..43cb3b3e1679 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -453,12 +453,6 @@ static inline int free_pages_check(struct page *page) 1 << PG_reserved | 1 << PG_buddy )))) bad_page(page); - /* - * PageReclaim == PageTail. It is only an error - * for PageReclaim to be set if PageCompound is clear. - */ - if (unlikely(!PageCompound(page) && PageReclaim(page))) - bad_page(page); if (PageDirty(page)) __ClearPageDirty(page); /* @@ -602,7 +596,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 1 << PG_locked | 1 << PG_active | 1 << PG_dirty | - 1 << PG_reclaim | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback | @@ -617,7 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) if (PageReserved(page)) return 1; - page->flags &= ~(1 << PG_uptodate | 1 << PG_error | + page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_readahead | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); set_page_private(page, 0); diff --git a/mm/readahead.c b/mm/readahead.c index 9861e883fe57..39bf45d43320 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -21,8 +21,16 @@ void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) } EXPORT_SYMBOL(default_unplug_io_fn); +/* + * Convienent macros for min/max read-ahead pages. + * Note that MAX_RA_PAGES is rounded down, while MIN_RA_PAGES is rounded up. + * The latter is necessary for systems with large page size(i.e. 64k). + */ +#define MAX_RA_PAGES (VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE) +#define MIN_RA_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE) + struct backing_dev_info default_backing_dev_info = { - .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE, + .ra_pages = MAX_RA_PAGES, .state = 0, .capabilities = BDI_CAP_MAP_COPY, .unplug_io_fn = default_unplug_io_fn, @@ -41,82 +49,6 @@ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) } EXPORT_SYMBOL_GPL(file_ra_state_init); -/* - * Return max readahead size for this inode in number-of-pages. - */ -static inline unsigned long get_max_readahead(struct file_ra_state *ra) -{ - return ra->ra_pages; -} - -static inline unsigned long get_min_readahead(struct file_ra_state *ra) -{ - return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE; -} - -static inline void reset_ahead_window(struct file_ra_state *ra) -{ - /* - * ... but preserve ahead_start + ahead_size value, - * see 'recheck:' label in page_cache_readahead(). - * Note: We never use ->ahead_size as rvalue without - * checking ->ahead_start != 0 first. - */ - ra->ahead_size += ra->ahead_start; - ra->ahead_start = 0; -} - -static inline void ra_off(struct file_ra_state *ra) -{ - ra->start = 0; - ra->flags = 0; - ra->size = 0; - reset_ahead_window(ra); - return; -} - -/* - * Set the initial window size, round to next power of 2 and square - * for small size, x 4 for medium, and x 2 for large - * for 128k (32 page) max ra - * 1-8 page = 32k initial, > 8 page = 128k initial - */ -static unsigned long get_init_ra_size(unsigned long size, unsigned long max) -{ - unsigned long newsize = roundup_pow_of_two(size); - - if (newsize <= max / 32) - newsize = newsize * 4; - else if (newsize <= max / 4) - newsize = newsize * 2; - else - newsize = max; - return newsize; -} - -/* - * Set the new window size, this is called only when I/O is to be submitted, - * not for each call to readahead. If a cache miss occured, reduce next I/O - * size, else increase depending on how close to max we are. - */ -static inline unsigned long get_next_ra_size(struct file_ra_state *ra) -{ - unsigned long max = get_max_readahead(ra); - unsigned long min = get_min_readahead(ra); - unsigned long cur = ra->size; - unsigned long newsize; - - if (ra->flags & RA_FLAG_MISS) { - ra->flags &= ~RA_FLAG_MISS; - newsize = max((cur - 2), min); - } else if (cur < max / 16) { - newsize = 4 * cur; - } else { - newsize = 2 * cur; - } - return min(newsize, max); -} - #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) /** @@ -193,66 +125,6 @@ out: } /* - * Readahead design. - * - * The fields in struct file_ra_state represent the most-recently-executed - * readahead attempt: - * - * start: Page index at which we started the readahead - * size: Number of pages in that read - * Together, these form the "current window". - * Together, start and size represent the `readahead window'. - * prev_index: The page which the readahead algorithm most-recently inspected. - * It is mainly used to detect sequential file reading. - * If page_cache_readahead sees that it is again being called for - * a page which it just looked at, it can return immediately without - * making any state changes. - * offset: Offset in the prev_index where the last read ended - used for - * detection of sequential file reading. - * ahead_start, - * ahead_size: Together, these form the "ahead window". - * ra_pages: The externally controlled max readahead for this fd. - * - * When readahead is in the off state (size == 0), readahead is disabled. - * In this state, prev_index is used to detect the resumption of sequential I/O. - * - * The readahead code manages two windows - the "current" and the "ahead" - * windows. The intent is that while the application is walking the pages - * in the current window, I/O is underway on the ahead window. When the - * current window is fully traversed, it is replaced by the ahead window - * and the ahead window is invalidated. When this copying happens, the - * new current window's pages are probably still locked. So - * we submit a new batch of I/O immediately, creating a new ahead window. - * - * So: - * - * ----|----------------|----------------|----- - * ^start ^start+size - * ^ahead_start ^ahead_start+ahead_size - * - * ^ When this page is read, we submit I/O for the - * ahead window. - * - * A `readahead hit' occurs when a read request is made against a page which is - * the next sequential page. Ahead window calculations are done only when it - * is time to submit a new IO. The code ramps up the size agressively at first, - * but slow down as it approaches max_readhead. - * - * Any seek/ramdom IO will result in readahead being turned off. It will resume - * at the first sequential access. - * - * There is a special-case: if the first page which the application tries to - * read happens to be the first page of the file, it is assumed that a linear - * read is about to happen and the window is immediately set to the initial size - * based on I/O request size and the max_readahead. - * - * This function is to be called for every read request, rather than when - * it is time to perform readahead. It is called only once for the entire I/O - * regardless of size unless readahead is unable to start enough I/O to satisfy - * the request (I/O request > max_readahead). - */ - -/* * do_page_cache_readahead actually reads a chunk of disk. It allocates all * the pages first, then submits them all for I/O. This avoids the very bad * behaviour which would occur if page allocations are causing VM writeback. @@ -265,7 +137,8 @@ out: */ static int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read) + pgoff_t offset, unsigned long nr_to_read, + unsigned long lookahead_size) { struct inode *inode = mapping->host; struct page *page; @@ -278,7 +151,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, if (isize == 0) goto out; - end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); + end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); /* * Preallocate as many pages as we will need. @@ -286,7 +159,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { pgoff_t page_offset = offset + page_idx; - + if (page_offset > end_index) break; @@ -301,6 +174,8 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, break; page->index = page_offset; list_add(&page->lru, &page_pool); + if (page_idx == nr_to_read - lookahead_size) + SetPageReadahead(page); ret++; } read_unlock_irq(&mapping->tree_lock); @@ -337,7 +212,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, if (this_chunk > nr_to_read) this_chunk = nr_to_read; err = __do_page_cache_readahead(mapping, filp, - offset, this_chunk); + offset, this_chunk, 0); if (err < 0) { ret = err; break; @@ -350,28 +225,6 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, } /* - * Check how effective readahead is being. If the amount of started IO is - * less than expected then the file is partly or fully in pagecache and - * readahead isn't helping. - * - */ -static inline int check_ra_success(struct file_ra_state *ra, - unsigned long nr_to_read, unsigned long actual) -{ - if (actual == 0) { - ra->cache_hit += nr_to_read; - if (ra->cache_hit >= VM_MAX_CACHE_HIT) { - ra_off(ra); - ra->flags |= RA_FLAG_INCACHE; - return 0; - } - } else { - ra->cache_hit=0; - } - return 1; -} - -/* * This version skips the IO if the queue is read-congested, and will tell the * block layer to abandon the readahead if request allocation would block. * @@ -384,200 +237,237 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, if (bdi_read_congested(mapping->backing_dev_info)) return -1; - return __do_page_cache_readahead(mapping, filp, offset, nr_to_read); + return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0); } /* - * Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block' - * is set wait till the read completes. Otherwise attempt to read without - * blocking. - * Returns 1 meaning 'success' if read is successful without switching off - * readahead mode. Otherwise return failure. + * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a + * sensible upper limit. */ -static int -blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read, - struct file_ra_state *ra, int block) +unsigned long max_sane_readahead(unsigned long nr) +{ + return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) + + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); +} + +/* + * Submit IO for the read-ahead request in file_ra_state. + */ +static unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp) { int actual; - if (!block && bdi_read_congested(mapping->backing_dev_info)) - return 0; + actual = __do_page_cache_readahead(mapping, filp, + ra->start, ra->size, ra->async_size); + + return actual; +} - actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read); +/* + * Set the initial window size, round to next power of 2 and square + * for small size, x 4 for medium, and x 2 for large + * for 128k (32 page) max ra + * 1-8 page = 32k initial, > 8 page = 128k initial + */ +static unsigned long get_init_ra_size(unsigned long size, unsigned long max) +{ + unsigned long newsize = roundup_pow_of_two(size); - return check_ra_success(ra, nr_to_read, actual); + if (newsize <= max / 32) + newsize = newsize * 4; + else if (newsize <= max / 4) + newsize = newsize * 2; + else + newsize = max; + + return newsize; } -static int make_ahead_window(struct address_space *mapping, struct file *filp, - struct file_ra_state *ra, int force) +/* + * Get the previous window size, ramp it up, and + * return it as the new window size. + */ +static unsigned long get_next_ra_size(struct file_ra_state *ra, + unsigned long max) { - int block, ret; - - ra->ahead_size = get_next_ra_size(ra); - ra->ahead_start = ra->start + ra->size; - - block = force || (ra->prev_index >= ra->ahead_start); - ret = blockable_page_cache_readahead(mapping, filp, - ra->ahead_start, ra->ahead_size, ra, block); - - if (!ret && !force) { - /* A read failure in blocking mode, implies pages are - * all cached. So we can safely assume we have taken - * care of all the pages requested in this call. - * A read failure in non-blocking mode, implies we are - * reading more pages than requested in this call. So - * we safely assume we have taken care of all the pages - * requested in this call. - * - * Just reset the ahead window in case we failed due to - * congestion. The ahead window will any way be closed - * in case we failed due to excessive page cache hits. - */ - reset_ahead_window(ra); - } + unsigned long cur = ra->size; + unsigned long newsize; - return ret; + if (cur < max / 16) + newsize = 4 * cur; + else + newsize = 2 * cur; + + return min(newsize, max); } -/** - * page_cache_readahead - generic adaptive readahead - * @mapping: address_space which holds the pagecache and I/O vectors - * @ra: file_ra_state which holds the readahead state - * @filp: passed on to ->readpage() and ->readpages() - * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units - * @req_size: hint: total size of the read which the caller is performing in - * PAGE_CACHE_SIZE units +/* + * On-demand readahead design. + * + * The fields in struct file_ra_state represent the most-recently-executed + * readahead attempt: + * + * |<----- async_size ---------| + * |------------------- size -------------------->| + * |==================#===========================| + * ^start ^page marked with PG_readahead * - * page_cache_readahead() is the main function. If performs the adaptive - * readahead window size management and submits the readahead I/O. + * To overlap application thinking time and disk I/O time, we do + * `readahead pipelining': Do not wait until the application consumed all + * readahead pages and stalled on the missing page at readahead_index; + * Instead, submit an asynchronous readahead I/O as soon as there are + * only async_size pages left in the readahead window. Normally async_size + * will be equal to size, for maximum pipelining. * - * Note that @filp is purely used for passing on to the ->readpage[s]() - * handler: it may refer to a different file from @mapping (so we may not use - * @filp->f_mapping or @filp->f_path.dentry->d_inode here). - * Also, @ra may not be equal to &@filp->f_ra. + * In interleaved sequential reads, concurrent streams on the same fd can + * be invalidating each other's readahead state. So we flag the new readahead + * page at (start+size-async_size) with PG_readahead, and use it as readahead + * indicator. The flag won't be set on already cached pages, to avoid the + * readahead-for-nothing fuss, saving pointless page cache lookups. + * + * prev_index tracks the last visited page in the _previous_ read request. + * It should be maintained by the caller, and will be used for detecting + * small random reads. Note that the readahead algorithm checks loosely + * for sequential patterns. Hence interleaved reads might be served as + * sequential ones. + * + * There is a special-case: if the first page which the application tries to + * read happens to be the first page of the file, it is assumed that a linear + * read is about to happen and the window is immediately set to the initial size + * based on I/O request size and the max_readahead. * + * The code ramps up the readahead size aggressively at first, but slow down as + * it approaches max_readhead. + */ + +/* + * A minimal readahead algorithm for trivial sequential/random reads. */ -unsigned long -page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, - struct file *filp, pgoff_t offset, unsigned long req_size) +static unsigned long +ondemand_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *filp, + bool hit_readahead_marker, pgoff_t offset, + unsigned long req_size) { - unsigned long max, newsize; + unsigned long max; /* max readahead pages */ int sequential; - /* - * We avoid doing extra work and bogusly perturbing the readahead - * window expansion logic. - */ - if (offset == ra->prev_index && --req_size) - ++offset; - - /* Note that prev_index == -1 if it is a first read */ - sequential = (offset == ra->prev_index + 1); - ra->prev_index = offset; - ra->prev_offset = 0; - - max = get_max_readahead(ra); - newsize = min(req_size, max); - - /* No readahead or sub-page sized read or file already in cache */ - if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) - goto out; - - ra->prev_index += newsize - 1; + max = ra->ra_pages; + sequential = (offset - ra->prev_index <= 1UL) || (req_size > max); /* - * Special case - first read at start of file. We'll assume it's - * a whole-file read and grow the window fast. Or detect first - * sequential access + * It's the expected callback offset, assume sequential access. + * Ramp up sizes, and push forward the readahead window. */ - if (sequential && ra->size == 0) { - ra->size = get_init_ra_size(newsize, max); - ra->start = offset; - if (!blockable_page_cache_readahead(mapping, filp, offset, - ra->size, ra, 1)) - goto out; - - /* - * If the request size is larger than our max readahead, we - * at least want to be sure that we get 2 IOs in flight and - * we know that we will definitly need the new I/O. - * once we do this, subsequent calls should be able to overlap - * IOs,* thus preventing stalls. so issue the ahead window - * immediately. - */ - if (req_size >= max) - make_ahead_window(mapping, filp, ra, 1); - - goto out; + if (offset && (offset == (ra->start + ra->size - ra->async_size) || + offset == (ra->start + ra->size))) { + ra->start += ra->size; + ra->size = get_next_ra_size(ra, max); + ra->async_size = ra->size; + goto readit; } /* - * Now handle the random case: - * partial page reads and first access were handled above, - * so this must be the next page otherwise it is random + * Standalone, small read. + * Read as is, and do not pollute the readahead state. */ - if (!sequential) { - ra_off(ra); - blockable_page_cache_readahead(mapping, filp, offset, - newsize, ra, 1); - goto out; + if (!hit_readahead_marker && !sequential) { + return __do_page_cache_readahead(mapping, filp, + offset, req_size, 0); } /* - * If we get here we are doing sequential IO and this was not the first - * occurence (ie we have an existing window) + * It may be one of + * - first read on start of file + * - sequential cache miss + * - oversize random read + * Start readahead for it. */ - if (ra->ahead_start == 0) { /* no ahead window yet */ - if (!make_ahead_window(mapping, filp, ra, 0)) - goto recheck; - } + ra->start = offset; + ra->size = get_init_ra_size(req_size, max); + ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; /* - * Already have an ahead window, check if we crossed into it. - * If so, shift windows and issue a new ahead window. - * Only return the #pages that are in the current window, so that - * we get called back on the first page of the ahead window which - * will allow us to submit more IO. + * Hit on a marked page without valid readahead state. + * E.g. interleaved reads. + * Not knowing its readahead pos/size, bet on the minimal possible one. */ - if (ra->prev_index >= ra->ahead_start) { - ra->start = ra->ahead_start; - ra->size = ra->ahead_size; - make_ahead_window(mapping, filp, ra, 0); -recheck: - /* prev_index shouldn't overrun the ahead window */ - ra->prev_index = min(ra->prev_index, - ra->ahead_start + ra->ahead_size - 1); + if (hit_readahead_marker) { + ra->start++; + ra->size = get_next_ra_size(ra, max); } -out: - return ra->prev_index + 1; +readit: + return ra_submit(ra, mapping, filp); } -EXPORT_SYMBOL_GPL(page_cache_readahead); -/* - * handle_ra_miss() is called when it is known that a page which should have - * been present in the pagecache (we just did some readahead there) was in fact - * not found. This will happen if it was evicted by the VM (readahead - * thrashing) +/** + * page_cache_sync_readahead - generic file readahead + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @filp: passed on to ->readpage() and ->readpages() + * @offset: start offset into @mapping, in pagecache page-sized units + * @req_size: hint: total size of the read which the caller is performing in + * pagecache pages * - * Turn on the cache miss flag in the RA struct, this will cause the RA code - * to reduce the RA size on the next read. + * page_cache_sync_readahead() should be called when a cache miss happened: + * it will submit the read. The readahead logic may decide to piggyback more + * pages onto the read request if access patterns suggest it will improve + * performance. */ -void handle_ra_miss(struct address_space *mapping, - struct file_ra_state *ra, pgoff_t offset) +void page_cache_sync_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *filp, + pgoff_t offset, unsigned long req_size) { - ra->flags |= RA_FLAG_MISS; - ra->flags &= ~RA_FLAG_INCACHE; - ra->cache_hit = 0; + /* no read-ahead */ + if (!ra->ra_pages) + return; + + /* do read-ahead */ + ondemand_readahead(mapping, ra, filp, false, offset, req_size); } +EXPORT_SYMBOL_GPL(page_cache_sync_readahead); -/* - * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a - * sensible upper limit. - */ -unsigned long max_sane_readahead(unsigned long nr) +/** + * page_cache_async_readahead - file readahead for marked pages + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @filp: passed on to ->readpage() and ->readpages() + * @page: the page at @offset which has the PG_readahead flag set + * @offset: start offset into @mapping, in pagecache page-sized units + * @req_size: hint: total size of the read which the caller is performing in + * pagecache pages + * + * page_cache_async_ondemand() should be called when a page is used which + * has the PG_readahead flag: this is a marker to suggest that the application + * has used up enough of the readahead window that we should start pulling in + * more pages. */ +void +page_cache_async_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *filp, + struct page *page, pgoff_t offset, + unsigned long req_size) { - return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) - + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); + /* no read-ahead */ + if (!ra->ra_pages) + return; + + /* + * Same bit is used for PG_readahead and PG_reclaim. + */ + if (PageWriteback(page)) + return; + + ClearPageReadahead(page); + + /* + * Defer asynchronous read-ahead on IO congestion. + */ + if (bdi_read_congested(mapping->backing_dev_info)) + return; + + /* do read-ahead */ + ondemand_readahead(mapping, ra, filp, true, offset, req_size); } +EXPORT_SYMBOL_GPL(page_cache_async_readahead); diff --git a/mm/rmap.c b/mm/rmap.c index 61e492597a0b..fede5c7910be 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -621,8 +621,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) printk (KERN_EMERG " page->count = %x\n", page_count(page)); printk (KERN_EMERG " page->mapping = %p\n", page->mapping); print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); - if (vma->vm_ops) + if (vma->vm_ops) { print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); + print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault); + } if (vma->vm_file && vma->vm_file->f_op) print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); BUG(); diff --git a/mm/shmem.c b/mm/shmem.c index 96fa79fb6ad3..ad155c7745dc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -83,6 +83,7 @@ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ SGP_WRITE, /* may exceed i_size, may allocate page */ + SGP_FAULT, /* same as SGP_CACHE, return with page locked */ }; static int shmem_getpage(struct inode *inode, unsigned long idx, @@ -1100,6 +1101,10 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, if (idx >= SHMEM_MAX_INDEX) return -EFBIG; + + if (type) + *type = 0; + /* * Normally, filepage is NULL on entry, and either found * uptodate immediately, or allocated and zeroed, or read @@ -1133,9 +1138,9 @@ repeat: if (!swappage) { shmem_swp_unmap(entry); /* here we actually do the io */ - if (type && *type == VM_FAULT_MINOR) { + if (type && !(*type & VM_FAULT_MAJOR)) { __count_vm_event(PGMAJFAULT); - *type = VM_FAULT_MAJOR; + *type |= VM_FAULT_MAJOR; } spin_unlock(&info->lock); swappage = shmem_swapin(info, swap, idx); @@ -1289,8 +1294,10 @@ repeat: } done: if (*pagep != filepage) { - unlock_page(filepage); *pagep = filepage; + if (sgp != SGP_FAULT) + unlock_page(filepage); + } return 0; @@ -1302,72 +1309,21 @@ failed: return error; } -static struct page *shmem_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct page *page = NULL; - unsigned long idx; int error; + int ret; - idx = (address - vma->vm_start) >> PAGE_SHIFT; - idx += vma->vm_pgoff; - idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return NOPAGE_SIGBUS; + if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) + return VM_FAULT_SIGBUS; - error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); + error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret); if (error) - return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; - - mark_page_accessed(page); - return page; -} - -static int shmem_populate(struct vm_area_struct *vma, - unsigned long addr, unsigned long len, - pgprot_t prot, unsigned long pgoff, int nonblock) -{ - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct mm_struct *mm = vma->vm_mm; - enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; - unsigned long size; + return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); - size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) - return -EINVAL; - - while ((long) len > 0) { - struct page *page = NULL; - int err; - /* - * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE - */ - err = shmem_getpage(inode, pgoff, &page, sgp, NULL); - if (err) - return err; - /* Page may still be null, but only if nonblock was set. */ - if (page) { - mark_page_accessed(page); - err = install_page(mm, vma, addr, page, prot); - if (err) { - page_cache_release(page); - return err; - } - } else if (vma->vm_flags & VM_NONLINEAR) { - /* No page was found just because we can't read it in - * now (being here implies nonblock != 0), but the page - * may exist, so set the PTE to fault it in later. */ - err = install_file_pte(mm, vma, addr, pgoff, prot); - if (err) - return err; - } - - len -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; - } - return 0; + mark_page_accessed(vmf->page); + return ret | VM_FAULT_LOCKED; } #ifdef CONFIG_NUMA @@ -1414,6 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } @@ -2459,8 +2416,7 @@ static const struct super_operations shmem_ops = { }; static struct vm_operations_struct shmem_vm_ops = { - .nopage = shmem_nopage, - .populate = shmem_populate, + .fault = shmem_fault, #ifdef CONFIG_NUMA .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, diff --git a/mm/slab.c b/mm/slab.c index 96d30ee256ef..88bc6336ce3d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1163,7 +1163,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, struct kmem_cache *cachep; struct kmem_list3 *l3 = NULL; int node = cpu_to_node(cpu); - int memsize = sizeof(struct kmem_list3); + const int memsize = sizeof(struct kmem_list3); switch (action) { case CPU_LOCK_ACQUIRE: diff --git a/mm/truncate.c b/mm/truncate.c index f47e46d1be3b..5cdfbc1a59fd 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(cancel_dirty_page); /* * If truncate cannot remove the fs-private metadata from the page, the page * becomes anonymous. It will be left on the LRU and may even be mapped into - * user pagetables if we're racing with filemap_nopage(). + * user pagetables if we're racing with filemap_fault(). * * We need to bale out if page->mapping is no longer equal to the original * mapping. This happens a) when the VM reclaimed the page while we waited on @@ -192,6 +192,11 @@ void truncate_inode_pages_range(struct address_space *mapping, unlock_page(page); continue; } + if (page_mapped(page)) { + unmap_mapping_range(mapping, + (loff_t)page_index<<PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, 0); + } truncate_complete_page(mapping, page); unlock_page(page); } @@ -229,6 +234,11 @@ void truncate_inode_pages_range(struct address_space *mapping, break; lock_page(page); wait_on_page_writeback(page); + if (page_mapped(page)) { + unmap_mapping_range(mapping, + (loff_t)page->index<<PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, 0); + } if (page->index > next) next = page->index; next++; @@ -405,7 +415,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, break; } wait_on_page_writeback(page); - while (page_mapped(page)) { + if (page_mapped(page)) { if (!did_range_unmap) { /* * Zap the rest of the file in one hit. @@ -425,6 +435,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, PAGE_CACHE_SIZE, 0); } } + BUG_ON(page_mapped(page)); ret = do_launder_page(mapping, page); if (ret == 0 && !invalidate_complete_page2(mapping, page)) ret = -EIO; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3130c343088f..3cee76a8c9f0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -164,6 +164,7 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) flush_cache_vmap((unsigned long) area->addr, end); return err; } +EXPORT_SYMBOL_GPL(map_vm_area); static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, @@ -242,6 +243,7 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, { return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL); } +EXPORT_SYMBOL_GPL(__get_vm_area); /** * get_vm_area - reserve a contingous kernel virtual area @@ -583,9 +585,9 @@ void *vmalloc_exec(unsigned long size) } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) -#define GFP_VMALLOC32 GFP_DMA32 +#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) -#define GFP_VMALLOC32 GFP_DMA +#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL #else #define GFP_VMALLOC32 GFP_KERNEL #endif diff --git a/net/core/sock.c b/net/core/sock.c index 091032a250c7..25d2557211c1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -171,6 +171,19 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_MAX" }; +static const char *af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , + "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", + "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , + "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , + "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , + "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , + "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , + "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , + "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , + "clock-27" , "clock-28" , "clock-29" , + "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_MAX" +}; #endif /* @@ -941,8 +954,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family); + lockdep_set_class_and_name(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family, + af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1530,8 +1544,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); - lockdep_set_class(&sk->sk_callback_lock, - af_callback_keys + sk->sk_family); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 43a3adb027e7..bae10b0f2fc3 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -112,7 +112,7 @@ static struct jprobe dccp_send_probe = { .kp = { .symbol_name = "dccp_sendmsg", }, - .entry = JPROBE_ENTRY(jdccp_sendmsg), + .entry = jdccp_sendmsg, }; static int dccpprobe_open(struct inode *inode, struct file *file) diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 523a137d49dd..465b73d50532 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -90,14 +90,11 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, } /* Add channel and frequency */ + /* Note : userspace automatically computes channel using iwrange */ iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = network->channel; - iwe.u.freq.e = 0; - iwe.u.freq.i = 0; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); - iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); iwe.u.freq.e = 6; + iwe.u.freq.i = 0; start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); /* Add encryption capability */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3da9d73d1b52..27c7918e442a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -177,7 +177,7 @@ static int ct_open(struct inode *inode, struct file *file) struct ct_iter_state *st; int ret; - st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); + st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); if (st == NULL) return -ENOMEM; ret = seq_open(file, &ct_seq_ops); @@ -185,7 +185,6 @@ static int ct_open(struct inode *inode, struct file *file) goto out_free; seq = file->private_data; seq->private = st; - memset(st, 0, sizeof(struct ct_iter_state)); return ret; out_free: kfree(st); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f37d5928921a..b76398d1b454 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -130,7 +130,7 @@ static struct jprobe tcp_jprobe = { .kp = { .symbol_name = "tcp_rcv_established", }, - .entry = JPROBE_ENTRY(jtcp_rcv_established), + .entry = jtcp_rcv_established, }; static int tcpprobe_open(struct inode * inode, struct file * file) diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c index 16e850864b8a..2118de04fc35 100644 --- a/net/mac80211/ieee80211_rate.c +++ b/net/mac80211/ieee80211_rate.c @@ -24,11 +24,10 @@ int ieee80211_rate_control_register(struct rate_control_ops *ops) { struct rate_control_alg *alg; - alg = kmalloc(sizeof(*alg), GFP_KERNEL); + alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) { return -ENOMEM; } - memset(alg, 0, sizeof(*alg)); alg->ops = ops; mutex_lock(&rate_ctrl_mutex); diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index ba2bf8f0a347..22b11786327a 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -1327,10 +1327,9 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; - bss = kmalloc(sizeof(*bss), GFP_ATOMIC); + bss = kzalloc(sizeof(*bss), GFP_ATOMIC); if (!bss) return NULL; - memset(bss, 0, sizeof(*bss)); atomic_inc(&bss->users); atomic_inc(&bss->users); memcpy(bss->bssid, bssid, ETH_ALEN); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index ffb6ff8c3528..fc847cc63be6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -198,7 +198,7 @@ static int ct_open(struct inode *inode, struct file *file) struct ct_iter_state *st; int ret; - st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); + st = kzalloc(sizeof(struct ct_iter_state), GFP_KERNEL); if (st == NULL) return -ENOMEM; ret = seq_open(file, &ct_seq_ops); @@ -206,7 +206,6 @@ static int ct_open(struct inode *inode, struct file *file) goto out_free; seq = file->private_data; seq->private = st; - memset(st, 0, sizeof(struct ct_iter_state)); return ret; out_free: kfree(st); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e787b6a43eee..5b2b6fb244f2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -460,21 +460,19 @@ static struct dentry_operations rpc_dentry_operations = { static int rpc_lookup_parent(char *path, struct nameidata *nd) { + struct vfsmount *mnt; + if (path[0] == '\0') return -ENOENT; - nd->mnt = rpc_get_mount(); - if (IS_ERR(nd->mnt)) { + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) { printk(KERN_WARNING "%s: %s failed to mount " "pseudofilesystem \n", __FILE__, __FUNCTION__); - return PTR_ERR(nd->mnt); + return PTR_ERR(mnt); } - mntget(nd->mnt); - nd->dentry = dget(rpc_mount->mnt_root); - nd->last_type = LAST_ROOT; - nd->flags = LOOKUP_PARENT; - nd->depth = 0; - if (path_walk(path, nd)) { + if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) { printk(KERN_WARNING "%s: %s failed to find path %s\n", __FILE__, __FUNCTION__, path); rpc_put_mount(); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9dfc9127acdd..d8473eefcd23 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1052,12 +1052,11 @@ int tipc_nametbl_init(void) { int array_size = sizeof(struct hlist_head) * tipc_nametbl_size; - table.types = kmalloc(array_size, GFP_ATOMIC); + table.types = kzalloc(array_size, GFP_ATOMIC); if (!table.types) return -ENOMEM; write_lock_bh(&tipc_nametbl_lock); - memset(table.types, 0, array_size); table.local_publ_count = 0; write_unlock_bh(&tipc_nametbl_lock); return 0; diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 25e20a27fc59..73751ab6ec0c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -9,7 +9,7 @@ use strict; my $P = $0; $P =~ s@.*/@@g; -my $V = '0.07'; +my $V = '0.08'; use Getopt::Long qw(:config no_auto_abbrev); @@ -47,16 +47,14 @@ my $removal = 'Documentation/feature-removal-schedule.txt'; if ($tree && -f $removal) { open(REMOVE, "<$removal") || die "$P: $removal: open failed - $!\n"; while (<REMOVE>) { - if (/^Files:\s+(.*\S)/) { - for my $file (split(/[, ]+/, $1)) { - if ($file =~ m@include/(.*)@) { + if (/^Check:\s+(.*\S)/) { + for my $entry (split(/[, ]+/, $1)) { + if ($entry =~ m@include/(.*)@) { push(@dep_includes, $1); - } - } - } elsif (/^Funcs:\s+(.*\S)/) { - for my $func (split(/[, ]+/, $1)) { - push(@dep_functions, $func); + } elsif ($entry !~ m@/@) { + push(@dep_functions, $entry); + } } } } @@ -153,7 +151,7 @@ sub sanitise_line { } sub ctx_block_get { - my ($linenr, $remain, $outer, $open, $close) = @_; + my ($linenr, $remain, $outer, $open, $close, $off) = @_; my $line; my $start = $linenr - 1; my $blk = ''; @@ -161,38 +159,58 @@ sub ctx_block_get { my @c; my @res = (); + my $level = 0; for ($line = $start; $remain > 0; $line++) { next if ($rawlines[$line] =~ /^-/); $remain--; $blk .= $rawlines[$line]; + foreach my $c (split(//, $rawlines[$line])) { + ##print "C<$c>L<$level><$open$close>O<$off>\n"; + if ($off > 0) { + $off--; + next; + } - @o = ($blk =~ /$open/g); - @c = ($blk =~ /$close/g); + if ($c eq $close && $level > 0) { + $level--; + last if ($level == 0); + } elsif ($c eq $open) { + $level++; + } + } - if (!$outer || (scalar(@o) - scalar(@c)) == 1) { + if (!$outer || $level <= 1) { push(@res, $rawlines[$line]); } - last if (scalar(@o) == scalar(@c)); + last if ($level == 0); } - return @res; + return ($level, @res); } sub ctx_block_outer { my ($linenr, $remain) = @_; - return ctx_block_get($linenr, $remain, 1, '\{', '\}'); + my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0); + return @r; } sub ctx_block { my ($linenr, $remain) = @_; - return ctx_block_get($linenr, $remain, 0, '\{', '\}'); + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0); + return @r; } sub ctx_statement { + my ($linenr, $remain, $off) = @_; + + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off); + return @r; +} +sub ctx_block_level { my ($linenr, $remain) = @_; - return ctx_block_get($linenr, $remain, 0, '\(', '\)'); + return ctx_block_get($linenr, $remain, 0, '{', '}', 0); } sub ctx_locate_comment { @@ -246,16 +264,23 @@ sub cat_vet { return $vet; } +my @report = (); +sub report { + push(@report, $_[0]); +} +sub report_dump { + @report; +} sub ERROR { - print "ERROR: $_[0]\n"; + report("ERROR: $_[0]\n"); our $clean = 0; } sub WARN { - print "WARNING: $_[0]\n"; + report("WARNING: $_[0]\n"); our $clean = 0; } sub CHK { - print "CHECK: $_[0]\n"; + report("CHECK: $_[0]\n"); our $clean = 0; } @@ -318,7 +343,10 @@ sub process { (?:\s*\*+\s*const|\s*\*+)? }x; my $Declare = qr{(?:$Storage\s+)?$Type}; - my $Attribute = qr{__read_mostly|__init|__initdata}; + my $Attribute = qr{const|__read_mostly|__init|__initdata|__meminit}; + + my $Member = qr{->$Ident|\.$Ident|\[[^]]*\]}; + my $Lval = qr{$Ident(?:$Member)*}; # Pre-scan the patch looking for any __setup documentation. my @setup_docs = (); @@ -509,7 +537,7 @@ sub process { # if/while/etc brace do not go on next line, unless defining a do while loop, # or if that brace on the next line is for something else if ($line =~ /\b(?:(if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.#/) { - my @ctx = ctx_statement($linenr, $realcnt); + my @ctx = ctx_statement($linenr, $realcnt, 0); my $ctx_ln = $linenr + $#ctx + 1; my $ctx_cnt = $realcnt - $#ctx - 1; my $ctx = join("\n", @ctx); @@ -521,7 +549,7 @@ sub process { ##warn "line<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>"; if ($ctx !~ /{\s*/ && $ctx_cnt > 0 && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { - ERROR("That { should be on the previous line\n" . + ERROR("That open brace { should be on the previous line\n" . "$here\n$ctx\n$lines[$ctx_ln - 1]"); } } @@ -535,6 +563,12 @@ sub process { next; } +# check for initialisation to aggregates open brace on the next line + if ($prevline =~ /$Declare\s*$Ident\s*=\s*$/ && + $line =~ /^.\s*{/) { + ERROR("That open brace { should be on the previous line\n" . $hereprev); + } + # # Checks which are anchored on the added line. # @@ -570,8 +604,13 @@ sub process { } } +# check for external initialisers. + if ($line =~ /^.$Type\s*$Ident\s*=\s*(0|NULL);/) { + ERROR("do not initialise externals to 0 or NULL\n" . + $herecurr); + } # check for static initialisers. - if ($line=~/\s*static\s.*=\s+(0|NULL);/) { + if ($line =~ /\s*static\s.*=\s*(0|NULL);/) { ERROR("do not initialise statics to 0 or NULL\n" . $herecurr); } @@ -593,11 +632,11 @@ sub process { ERROR("\"(foo $1 )\" should be \"(foo $1)\"\n" . $herecurr); - } elsif ($line =~ m{$NonptrType(\*+)(?:\s+const)?\s+[A-Za-z\d_]+}) { + } elsif ($line =~ m{$NonptrType(\*+)(?:\s+$Attribute)?\s+[A-Za-z\d_]+}) { ERROR("\"foo$1 bar\" should be \"foo $1bar\"\n" . $herecurr); - } elsif ($line =~ m{$NonptrType\s+(\*+)(?!\s+const)\s+[A-Za-z\d_]+}) { + } elsif ($line =~ m{$NonptrType\s+(\*+)(?!\s+$Attribute)\s+[A-Za-z\d_]+}) { ERROR("\"foo $1 bar\" should be \"foo $1bar\"\n" . $herecurr); } @@ -614,7 +653,7 @@ sub process { # to try and find and validate the current printk. In summary the current # printk includes all preceeding printk's which have no newline on the end. # we assume the first bad printk is the one to report. - if ($line =~ /\bprintk\((?!KERN_)/) { + if ($line =~ /\bprintk\((?!KERN_)\s*"/) { my $ok = 0; for (my $ln = $linenr - 1; $ln >= $first_line; $ln--) { #print "CHECK<$lines[$ln - 1]\n"; @@ -639,6 +678,12 @@ sub process { ERROR("open brace '{' following function declarations go on the next line\n" . $herecurr); } +# check for spaces between functions and their parentheses. + if ($line =~ /($Ident)\s+\(/ && + $1 !~ /^(?:if|for|while|switch|return|volatile)$/ && + $line !~ /$Type\s+\(/ && $line !~ /^.\#\s*define\b/) { + ERROR("no space between function name and open parenthesis '('\n" . $herecurr); + } # Check operator spacing. # Note we expand the line with the leading + as the real # line will be displayed with the leading + and the tabs @@ -647,7 +692,7 @@ sub process { $opline = expand_tabs($opline); $opline =~ s/^./ /; if (!($line=~/\#\s*include/)) { - my @elements = split(/(<<=|>>=|<=|>=|==|!=|\+=|-=|\*=|\/=|%=|\^=|\|=|&=|->|<<|>>|<|>|=|!|~|&&|\|\||,|\^|\+\+|--|;|&|\||\+|-|\*|\/\/|\/)/, $opline); + my @elements = split(/(<<=|>>=|<=|>=|==|!=|\+=|-=|\*=|\/=|%=|\^=|\|=|&=|=>|->|<<|>>|<|>|=|!|~|&&|\|\||,|\^|\+\+|--|;|&|\||\+|-|\*|\/\/|\/)/, $opline); my $off = 0; for (my $n = 0; $n < $#elements; $n += 2) { $off += length($elements[$n]); @@ -773,6 +818,18 @@ sub process { } } +# check for multiple assignments + if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) { + WARN("multiple assignments should be avoided\n" . $herecurr); + } + +# check for multiple declarations, allowing for a function declaration +# continuation. + if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ && + $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) { + WARN("declaring multiple variables together should be avoided\n" . $herecurr); + } + #need space before brace following if, while, etc if ($line =~ /\(.*\){/ || $line =~ /do{/) { ERROR("need a space before the open brace '{'\n" . $herecurr); @@ -847,13 +904,18 @@ sub process { # or the one below. my $ln = $linenr; my $cnt = $realcnt; + my $off = 0; - # If the macro starts on the define line start there. - if ($prevline !~ m{^.#\s*define\s*$Ident(?:\([^\)]*\))?\s*\\\s*$}) { + # If the macro starts on the define line start + # grabbing the statement after the identifier + $prevline =~ m{^(.#\s*define\s*$Ident(?:\([^\)]*\))?\s*)(.*)\\\s*$}; + ##print "1<$1> 2<$2>\n"; + if ($2 ne '') { + $off = length($1); $ln--; $cnt++; } - my @ctx = ctx_statement($ln, $cnt); + my @ctx = ctx_statement($ln, $cnt, $off); my $ctx_ln = $ln + $#ctx + 1; my $ctx = join("\n", @ctx); @@ -873,6 +935,44 @@ sub process { } } +# check for redundant bracing round if etc + if ($line =~ /\b(if|while|for|else)\b/) { + # Locate the end of the opening statement. + my @control = ctx_statement($linenr, $realcnt, 0); + my $nr = $linenr + (scalar(@control) - 1); + my $cnt = $realcnt - (scalar(@control) - 1); + + my $off = $realcnt - $cnt; + #print "$off: line<$line>end<" . $lines[$nr - 1] . ">\n"; + + # If this is is a braced statement group check it + if ($lines[$nr - 1] =~ /{\s*$/) { + my ($lvl, @block) = ctx_block_level($nr, $cnt); + + my $stmt = join(' ', @block); + $stmt =~ s/^[^{]*{//; + $stmt =~ s/}[^}]*$//; + + #print "block<" . join(' ', @block) . "><" . scalar(@block) . ">\n"; + #print "stmt<$stmt>\n\n"; + + # Count the ;'s if there is fewer than two + # then there can only be one statement, + # if there is a brace inside we cannot + # trivially detect if its one statement. + # Also nested if's often require braces to + # disambiguate the else binding so shhh there. + my @semi = ($stmt =~ /;/g); + ##print "semi<" . scalar(@semi) . ">\n"; + if ($lvl == 0 && scalar(@semi) < 2 && + $stmt !~ /{/ && $stmt !~ /\bif\b/) { + my $herectx = "$here\n" . join("\n", @control, @block[1 .. $#block]) . "\n"; + shift(@block); + ERROR("braces {} are not necessary for single statement blocks\n" . $herectx); + } + } + } + # don't include deprecated include files (uses RAW line) for my $inc (@dep_includes) { if ($rawline =~ m@\#\s*include\s*\<$inc>@) { @@ -898,6 +998,14 @@ sub process { $herecurr); } +# check for needless kfree() checks + if ($prevline =~ /\bif\s*\(([^\)]*)\)/) { + my $expr = $1; + if ($line =~ /\bkfree\(\Q$expr\E\);/) { + WARN("kfree(NULL) is safe this check is probabally not required\n" . $hereprev); + } + } + # warn about #ifdefs in C files # if ($line =~ /^.#\s*if(|n)def/ && ($realfile =~ /\.c$/)) { # print "#ifdef in C files should be avoided\n"; @@ -952,6 +1060,9 @@ sub process { ERROR("Missing Signed-off-by: line(s)\n"); } + if ($clean == 0 && ($chk_patch || $is_patch)) { + print report_dump(); + } if ($clean == 1 && $quiet == 0) { print "Your patch has no obvious style problems and is ready for submission.\n" } diff --git a/scripts/kernel-doc b/scripts/kernel-doc index e5bf649e516a..1f5835115cad 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -154,6 +154,7 @@ use strict; my $errors = 0; my $warnings = 0; +my $anon_struct_union = 0; # match expressions used to find embedded type information my $type_constant = '\%([-_\w]+)'; @@ -403,7 +404,11 @@ sub output_highlight { print $lineprefix, $blankline; } else { $line =~ s/\\\\\\/\&/g; - print $lineprefix, $line; + if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { + print "\\&$line"; + } else { + print $lineprefix, $line; + } } print "\n"; } @@ -718,6 +723,7 @@ sub output_struct_xml(%) { # pointer-to-function print " $1 $parameter) ($2);\n"; } elsif ($type =~ m/^(.*?)\s*(:.*)/) { + # bitfield print " $1 $parameter$2;\n"; } else { print " ".$type." ".$parameter.";\n"; @@ -1260,6 +1266,7 @@ sub output_struct_text(%) { # pointer-to-function print "\t$1 $parameter) ($2);\n"; } elsif ($type =~ m/^(.*?)\s*(:.*)/) { + # bitfield print "\t$1 $parameter$2;\n"; } else { print "\t".$type." ".$parameter.";\n"; @@ -1510,8 +1517,13 @@ sub push_parameter($$$) { my $param = shift; my $type = shift; my $file = shift; - my $anon = 0; + if (($anon_struct_union == 1) && ($type eq "") && + ($param eq "}")) { + return; # ignore the ending }; from anon. struct/union + } + + $anon_struct_union = 0; my $param_name = $param; $param_name =~ s/\[.*//; @@ -1530,16 +1542,16 @@ sub push_parameter($$$) { # handle unnamed (anonymous) union or struct: { $type = $param; - $param = "{unnamed_" . $param. "}"; + $param = "{unnamed_" . $param . "}"; $parameterdescs{$param} = "anonymous\n"; - $anon = 1; + $anon_struct_union = 1; } # warn if parameter has no description # (but ignore ones starting with # as these are not parameters # but inline preprocessor statements); # also ignore unnamed structs/unions; - if (!$anon) { + if (!$anon_struct_union) { if (!defined $parameterdescs{$param_name} && $param_name !~ /^#/) { $parameterdescs{$param_name} = $undescribed; @@ -1691,6 +1703,8 @@ sub process_state3_function($$) { my $x = shift; my $file = shift; + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + if ($x =~ m#\s*/\*\s+MACDOC\s*#io || ($x =~ /^#/ && $x !~ /^#define/)) { # do nothing } @@ -1713,6 +1727,8 @@ sub process_state3_type($$) { $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. $x =~ s@^\s+@@gos; # strip leading spaces $x =~ s@\s+$@@gos; # strip trailing spaces + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + if ($x =~ /^#/) { # To distinguish preprocessor directive from regular declaration later. $x .= ";"; @@ -1796,7 +1812,7 @@ sub process_file($) { $state = 2; if (/-(.*)/) { - # strip leading/trailing/multiple spaces #RDD:T: + # strip leading/trailing/multiple spaces $descr= $1; $descr =~ s/^\s*//; $descr =~ s/\s*$//; diff --git a/security/commoncap.c b/security/commoncap.c index 384379ede4fd..338606eb7238 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -148,7 +148,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset (new_permitted, current->cap_permitted)) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { diff --git a/security/dummy.c b/security/dummy.c index d6a112ce2975..19d813d5e083 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -130,7 +130,7 @@ static void dummy_bprm_free_security (struct linux_binprm *bprm) static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { - current->mm->dumpable = suid_dumpable; + set_dumpable(current->mm, suid_dumpable); if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) && !capable(CAP_SETUID)) { bprm->e_uid = current->uid; diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 061a7c61402a..e11790f6debe 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -363,7 +363,7 @@ static int snd_virmidi_dev_attach_seq(struct snd_virmidi_dev *rdev) if (rdev->client >= 0) return 0; - pinfo = kmalloc(sizeof(*pinfo), GFP_KERNEL); + pinfo = kzalloc(sizeof(*pinfo), GFP_KERNEL); if (!pinfo) { err = -ENOMEM; goto __error; @@ -380,7 +380,6 @@ static int snd_virmidi_dev_attach_seq(struct snd_virmidi_dev *rdev) rdev->client = client; /* create a port */ - memset(pinfo, 0, sizeof(*pinfo)); pinfo->addr.client = client; sprintf(pinfo->name, "VirMIDI %d-%d", rdev->card->number, rdev->device); /* set all capabilities */ diff --git a/sound/core/sound.c b/sound/core/sound.c index 70600df94d62..8dc7a3b32b98 100644 --- a/sound/core/sound.c +++ b/sound/core/sound.c @@ -446,8 +446,7 @@ static void __exit alsa_sound_exit(void) { snd_info_minor_unregister(); snd_info_done(); - if (unregister_chrdev(major, "alsa") != 0) - snd_printk(KERN_ERR "unable to unregister major device number %d\n", major); + unregister_chrdev(major, "alsa"); } module_init(alsa_sound_init) |