diff options
361 files changed, 10136 insertions, 4303 deletions
@@ -32,6 +32,7 @@ Christoph Hellwig <hch@lst.de> Corey Minyard <minyard@acm.org> David Brownell <david-b@pacbell.net> David Woodhouse <dwmw2@shinybook.infradead.org> +Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> Domen Puncer <domen@coderock.org> Douglas Gilbert <dougg@torque.net> Ed L. Cashin <ecashin@coraid.com> diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index 7a16fe1e2270..9fe91c02ee40 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -6,7 +6,6 @@ Description: internal state of the kernel memory blocks. Files could be added or removed dynamically to represent hot-add/remove operations. - Users: hotplug memory add/remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ @@ -19,6 +18,56 @@ Description: This is useful for a user-level agent to determine identify removable sections of the memory before attempting potentially expensive hot-remove memory operation +Users: hotplug memory remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/memory/memoryX/phys_device +Date: September 2008 +Contact: Badari Pulavarty <pbadari@us.ibm.com> +Description: + The file /sys/devices/system/memory/memoryX/phys_device + is read-only and is designed to show the name of physical + memory device. Implementation is currently incomplete. +What: /sys/devices/system/memory/memoryX/phys_index +Date: September 2008 +Contact: Badari Pulavarty <pbadari@us.ibm.com> +Description: + The file /sys/devices/system/memory/memoryX/phys_index + is read-only and contains the section ID in hexadecimal + which is equivalent to decimal X contained in the + memory section directory name. + +What: /sys/devices/system/memory/memoryX/state +Date: September 2008 +Contact: Badari Pulavarty <pbadari@us.ibm.com> +Description: + The file /sys/devices/system/memory/memoryX/state + is read-write. When read, it's contents show the + online/offline state of the memory section. When written, + root can toggle the the online/offline state of a removable + memory section (see removable file description above) + using the following commands. + # echo online > /sys/devices/system/memory/memoryX/state + # echo offline > /sys/devices/system/memory/memoryX/state + + For example, if /sys/devices/system/memory/memory22/removable + contains a value of 1 and + /sys/devices/system/memory/memory22/state contains the + string "online" the following command can be executed by + by root to offline that section. + # echo offline > /sys/devices/system/memory/memory22/state Users: hotplug memory remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/node/nodeX/memoryY +Date: September 2008 +Contact: Gary Hade <garyhade@us.ibm.com> +Description: + When CONFIG_NUMA is enabled + /sys/devices/system/node/nodeX/memoryY is a symbolic link that + points to the corresponding /sys/devices/system/memory/memoryY + memory section directory. For example, the following symbolic + link is created for memory section 9 on node0. + /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 + diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index c74fec8c2351..b2a4d6d244d9 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt @@ -26,7 +26,7 @@ mapped only for the time they are actually used and unmapped after the DMA transfer. The following API will work of course even on platforms where no such -hardware exists, see e.g. include/asm-i386/pci.h for how it is implemented on +hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on top of the virt_to_bus interface. First of all, you should make sure diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ccec55394380..cfbfa15a46ba 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -397,7 +397,7 @@ prototypes: }; locking rules: - All except ->poll() may block. + All may block. BKL llseek: no (see below) read: no diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 71df353e367c..32e94635484f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1385,6 +1385,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 causes the kernel to prefer to reclaim dentries and inodes. +dirty_background_bytes +---------------------- + +Contains the amount of dirty memory at which the pdflush background writeback +daemon will start writeback. + +If dirty_background_bytes is written, dirty_background_ratio becomes a function +of its value (dirty_background_bytes / the amount of dirtyable system memory). + dirty_background_ratio ---------------------- @@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of pages at which the pdflush background writeback daemon will start writing out dirty data. +If dirty_background_ratio is written, dirty_background_bytes becomes a function +of its value (dirty_background_ratio * the amount of dirtyable system memory). + +dirty_bytes +----------- + +Contains the amount of dirty memory at which a process generating disk writes +will itself start writeback. + +If dirty_bytes is written, dirty_ratio becomes a function of its value +(dirty_bytes / the amount of dirtyable system memory). + dirty_ratio ------------------ +----------- Contains, as a percentage of the dirtyable system memory (free pages + mapped pages + file cache, not including locked pages and HugePages), the number of pages at which a process which is generating disk writes will itself start writing out dirty data. +If dirty_ratio is written, dirty_bytes becomes a function of its value +(dirty_ratio * the amount of dirtyable system memory). + dirty_writeback_centisecs ------------------------- diff --git a/Documentation/hwmon/adt7470 b/Documentation/hwmon/adt7470 index 75d13ca147cc..8ce4aa0a0f55 100644 --- a/Documentation/hwmon/adt7470 +++ b/Documentation/hwmon/adt7470 @@ -31,15 +31,11 @@ Each of the measured inputs (temperature, fan speed) has corresponding high/low limit values. The ADT7470 will signal an ALARM if any measured value exceeds either limit. -The ADT7470 DOES NOT sample all inputs continuously. A single pin on the -ADT7470 is connected to a multitude of thermal diodes, but the chip must be -instructed explicitly to read the multitude of diodes. If you want to use -automatic fan control mode, you must manually read any of the temperature -sensors or the fan control algorithm will not run. The chip WILL NOT DO THIS -AUTOMATICALLY; this must be done from userspace. This may be a bug in the chip -design, given that many other AD chips take care of this. The driver will not -read the registers more often than once every 5 seconds. Further, -configuration data is only read once per minute. +The ADT7470 samples all inputs continuously. A kernel thread is started up for +the purpose of periodically querying the temperature sensors, thus allowing the +automatic fan pwm control to set the fan speed. The driver will not read the +registers more often than once every 5 seconds. Further, configuration data is +only read once per minute. Special Features ---------------- @@ -72,5 +68,6 @@ pwm#_auto_point2_temp. Notes ----- -As stated above, the temperature inputs must be read periodically from -userspace in order for the automatic pwm algorithm to run. +The temperature inputs no longer need to be read periodically from userspace in +order for the automatic pwm algorithm to run. This was the case for earlier +versions of the driver. diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt index d5885468b072..98152bcd515a 100644 --- a/Documentation/ide/warm-plug-howto.txt +++ b/Documentation/ide/warm-plug-howto.txt @@ -11,3 +11,8 @@ unplug old device(s) and plug new device(s) # echo -n "1" > /sys/class/ide_port/idex/scan done + +NOTE: please make sure that partitions are unmounted and that there are +no other active references to devices before doing "delete_devices" step, +also do not attempt "scan" step on devices currently in use -- otherwise +results may be unpredictable and lead to data loss if you're unlucky diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 824699174436..f1d639903325 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -84,7 +84,7 @@ Code Seq# Include File Comments 'B' C0-FF advanced bbus <mailto:maassen@uni-freiburg.de> 'C' all linux/soundcard.h -'D' all asm-s390/dasd.h +'D' all arch/s390/include/asm/dasd.h 'E' all linux/input.h 'F' all linux/fb.h 'H' all linux/hiddev.h @@ -105,7 +105,7 @@ Code Seq# Include File Comments 'S' 80-81 scsi/scsi_ioctl.h conflict! 'S' 82-FF scsi/scsi.h conflict! 'T' all linux/soundcard.h conflict! -'T' all asm-i386/ioctls.h conflict! +'T' all arch/x86/include/asm/ioctls.h conflict! 'U' 00-EF linux/drivers/usb/usb.h 'V' all linux/vt.h 'W' 00-1F linux/watchdog.h conflict! @@ -120,7 +120,7 @@ Code Seq# Include File Comments <mailto:natalia@nikhefk.nikhef.nl> 'c' 00-7F linux/comstats.h conflict! 'c' 00-7F linux/coda.h conflict! -'c' 80-9F asm-s390/chsc.h +'c' 80-9F arch/s390/include/asm/chsc.h 'd' 00-FF linux/char/drm/drm/h conflict! 'd' 00-DF linux/video_decoder.h conflict! 'd' F0-FF linux/digi1.h @@ -170,7 +170,7 @@ Code Seq# Include File Comments <mailto:oe@port.de> 0x80 00-1F linux/fb.h 0x81 00-1F linux/videotext.h -0x89 00-06 asm-i386/sockios.h +0x89 00-06 arch/x86/include/asm/sockios.h 0x89 0B-DF linux/sockios.h 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index c6841eee9598..d73fbd2b2b45 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -71,6 +71,11 @@ The @argument descriptions must begin on the very next line following this opening short function description line, with no intervening empty comment lines. +If a function parameter is "..." (varargs), it should be listed in +kernel-doc notation as: + * @...: description + + Example kernel-doc data structure comment. /** @@ -282,6 +287,32 @@ struct my_struct { }; +Including documentation blocks in source files +---------------------------------------------- + +To facilitate having source code and comments close together, you can +include kernel-doc documentation blocks that are free-form comments +instead of being kernel-doc for functions, structures, unions, +enums, or typedefs. This could be used for something like a +theory of operation for a driver or library code, for example. + +This is done by using a DOC: section keyword with a section title. E.g.: + +/** + * DOC: Theory of Operation + * + * The whizbang foobar is a dilly of a gizmo. It can do whatever you + * want it to do, at any time. It reads your mind. Here's how it works. + * + * foo bar splat + * + * The only drawback to this gizmo is that is can sometimes damage + * hardware, software, or its subject(s). + */ + +DOC: sections are used in SGML templates files as indicated below. + + How to make new SGML template files ----------------------------------- @@ -302,6 +333,9 @@ exported using EXPORT_SYMBOL. !F<filename> <function [functions...]> is replaced by the documentation, in <filename>, for the functions listed. +!P<filename> <section title> is replaced by the contents of the DOC: +section titled <section title> from <filename>. +Spaces are allowed in <section title>; do not quote the <section title>. Tim. */ <twaugh@redhat.com> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a2d8805c03d5..0b3f6711d2f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -469,8 +469,8 @@ and is between 256 and 4096 characters. It is defined in the file clearcpuid=BITNUM [X86] Disable CPUID feature X for the kernel. See - include/asm-x86/cpufeature.h for the valid bit numbers. - Note the Linux specific bits are not necessarily + arch/x86/include/asm/cpufeature.h for the valid bit + numbers. Note the Linux specific bits are not necessarily stable over kernel options, but the vendor specific ones should be. Also note that user programs calling CPUID directly @@ -551,6 +551,11 @@ and is between 256 and 4096 characters. It is defined in the file not work reliably with all consoles, but is known to work with serial and VGA consoles. + coredump_filter= + [KNL] Change the default value for + /proc/<pid>/coredump_filter. + See also Documentation/filesystems/proc.txt. + cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver Format: <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>] @@ -1117,6 +1122,8 @@ and is between 256 and 4096 characters. It is defined in the file If there are multiple matching configurations changing the same attribute, the last one is used. + lmb=debug [KNL] Enable lmb debug messages. + load_ramdisk= [RAM] List of ramdisks to load from floppy See Documentation/blockdev/ramdisk.txt. @@ -1569,6 +1576,10 @@ and is between 256 and 4096 characters. It is defined in the file nr_uarts= [SERIAL] maximum number of UARTs to be registered. + ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. + See Documentation/debugging-via-ohci1394.txt for more + info. + olpc_ec_timeout= [OLPC] ms delay when issuing EC commands Rather than timing out after 20 ms if an EC command is not properly ACKed, override the length @@ -1793,10 +1804,10 @@ and is between 256 and 4096 characters. It is defined in the file autoconfiguration. Ranges are in pairs (memory base and size). - dynamic_printk - Enables pr_debug()/dev_dbg() calls if - CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also - be switched on/off via <debugfs>/dynamic_printk/modules + dynamic_printk Enables pr_debug()/dev_dbg() calls if + CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. + These can also be switched on/off via + <debugfs>/dynamic_printk/modules print-fatal-signals= [KNL] debug: print fatal signals @@ -1884,7 +1895,7 @@ and is between 256 and 4096 characters. It is defined in the file reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode Format: <reboot_mode>[,<reboot_mode2>[,...]] - See arch/*/kernel/reboot.c or arch/*/kernel/process.c + See arch/*/kernel/reboot.c or arch/*/kernel/process.c relax_domain_level= [KNL, SMP] Set scheduler's default relax_domain_level. @@ -2432,8 +2443,8 @@ and is between 256 and 4096 characters. It is defined in the file Format: <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] - norandmaps Don't use address space randomization - Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + norandmaps Don't use address space randomization. Equivalent to + echo 0 > /proc/sys/kernel/randomize_va_space ______________________________________________________________________ diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index a79633d702bf..48b3de90eb1e 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -497,7 +497,10 @@ The first column provides the kernel address where the probe is inserted. The second column identifies the type of probe (k - kprobe, r - kretprobe and j - jprobe), while the third column specifies the symbol+offset of the probe. If the probed function belongs to a module, the module name -is also specified. +is also specified. Following columns show probe status. If the probe is on +a virtual address that is no longer valid (module init sections, module +virtual addresses that correspond to modules that've been unloaded), +such probes are marked with [GONE]. /debug/kprobes/enabled: Turn kprobes ON/OFF diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt index 95070028d15e..505f19607542 100644 --- a/Documentation/magic-number.txt +++ b/Documentation/magic-number.txt @@ -125,14 +125,14 @@ TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c ROUTER_MAGIC 0x524d4157 wan_device include/linux/wanrouter.h SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c -GDA_MAGIC 0x58464552 gda include/asm-mips64/sn/gda.h +GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h RED_MAGIC1 0x5a2cf071 (any) mm/slab.c STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h -KV_MAGIC 0x5f4b565f kernel_vars_s include/asm-mips64/sn/klkernvars.h +KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c @@ -158,7 +158,7 @@ CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c -NMI_MAGIC 0x48414d4d455201 nmi_s include/asm-mips64/sn/nmi.h +NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h Note that there are also defined special per-driver magic numbers in sound memory management. See include/sound/sndmagic.h for complete list of them. Many diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 168117bd6ee8..4c2ecf537a4a 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -124,7 +124,7 @@ config options. This option can be kernel module too. -------------------------------- -3 sysfs files for memory hotplug +4 sysfs files for memory hotplug -------------------------------- All sections have their device information under /sys/devices/system/memory as @@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at (0x100000000 / 1Gib = 4) This device covers address range [0x100000000 ... 0x140000000) -Under each section, you can see 3 files. +Under each section, you can see 4 files. /sys/devices/system/memory/memoryXXX/phys_index /sys/devices/system/memory/memoryXXX/phys_device /sys/devices/system/memory/memoryXXX/state +/sys/devices/system/memory/memoryXXX/removable 'phys_index' : read-only and contains section id, same as XXX. 'state' : read-write @@ -150,10 +151,20 @@ Under each section, you can see 3 files. at write: user can specify "online", "offline" command 'phys_device': read-only: designed to show the name of physical memory device. This is not well implemented now. +'removable' : read-only: contains an integer value indicating + whether the memory section is removable or not + removable. A value of 1 indicates that the memory + section is removable and a value of 0 indicates that + it is not removable. NOTE: These directories/files appear after physical memory hotplug phase. +If CONFIG_NUMA is enabled the +/sys/devices/system/memory/memoryXXX memory section +directories can also be accessed via symbolic links located in +the /sys/devices/system/node/node* directories. For example: +/sys/devices/system/node/node0/memory9 -> ../../memory/memory9 -------------------------------- 4. Physical memory hot-add phase @@ -365,7 +376,6 @@ node if necessary. - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like sysctl or new control file. - showing memory section and physical device relationship. - - showing memory section and node relationship (maybe good for NUMA) - showing memory section is under ZONE_MOVABLE or not - test and make it better memory offlining. - support HugeTLB page migration and offlining. diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README index 25a6ed1aaa5b..f54962aea84d 100644 --- a/Documentation/mips/AU1xxx_IDE.README +++ b/Documentation/mips/AU1xxx_IDE.README @@ -44,7 +44,7 @@ FILES, CONFIGS AND COMPATABILITY Two files are introduced: - a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h' + a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h' containes : struct _auide_hwif timing parameters for PIO mode 0/1/2/3/4 timing parameters for MWDMA 0/1/2 diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt index 472739880e87..ffa4183fdb8b 100644 --- a/Documentation/powerpc/cpu_features.txt +++ b/Documentation/powerpc/cpu_features.txt @@ -31,7 +31,7 @@ anyways). After detecting the processor type, the kernel patches out sections of code that shouldn't be used by writing nop's over it. Using cpufeatures requires -just 2 macros (found in include/asm-ppc/cputable.h), as seen in head.S +just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S transfer_to_handler: #ifdef CONFIG_ALTIVEC diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index d30a281c570f..10711d9f0788 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt @@ -1402,7 +1402,7 @@ Syscalls are implemented on Linux for S390 by the Supervisor call instruction (S possibilities of these as the instruction is made up of a 0xA opcode & the second byte being the syscall number. They are traced using the simple command. TR SVC <Optional value or range> -the syscalls are defined in linux/include/asm-s390/unistd.h +the syscalls are defined in linux/arch/s390/include/asm/unistd.h e.g. to trace all file opens just do TR SVC 5 ( as this is the syscall number of open ) diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt index c4b7b2bd369a..480a78ef5a1e 100644 --- a/Documentation/s390/cds.txt +++ b/Documentation/s390/cds.txt @@ -98,7 +98,7 @@ platform. Some of the interface routines are specific to Linux/390 and some of them can be found on other Linux platforms implementations too. Miscellaneous function prototypes, data declarations, and macro definitions can be found in the architecture specific C header file -linux/include/asm-s390/irq.h. +linux/arch/s390/include/asm/irq.h. Overview of CDS interface concepts diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index e05420973698..2d10053dd97e 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt @@ -2,7 +2,7 @@ S390 Debug Feature ================== files: arch/s390/kernel/debug.c - include/asm-s390/debug.h + arch/s390/include/asm/debug.h Description: ------------ diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index d79eeda7a699..cd05994a49e6 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm: ============================================================== -dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, +dirty_bytes, dirty_ratio, dirty_background_bytes, +dirty_background_ratio, dirty_expire_centisecs, dirty_writeback_centisecs, highmem_is_dirtyable, vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, drop-caches, hugepages_treat_as_movable: diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index 125eed560e5a..0706a7282a8c 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt @@ -137,13 +137,6 @@ shrink_page_list() where they will be detected when vmscan walks the reverse map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() will cull the page at that point. -Note that for anonymous pages, shrink_page_list() attempts to add the page to -the swap cache before it tries to unmap the page. To avoid this unnecessary -consumption of swap space, shrink_page_list() calls try_to_munlock() to check -whether any VM_LOCKED vmas map the page without attempting to unmap the page. -If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page -without consuming swap space. try_to_munlock() will be described below. - To "cull" an unevictable page, vmscan simply puts the page back on the lru list using putback_lru_page()--the inverse operation to isolate_lru_page()-- after dropping the page lock. Because the condition which makes the page @@ -190,8 +183,8 @@ several places: in the VM_LOCKED flag being set for the vma. 3) in the fault path, if mlocked pages are "culled" in the fault path, and when a VM_LOCKED stack segment is expanded. -4) as mentioned above, in vmscan:shrink_page_list() with attempting to - reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock(). +4) as mentioned above, in vmscan:shrink_page_list() when attempting to + reclaim a page in a VM_LOCKED vma via try_to_unmap(). Mlocked pages become unlocked and rescued from the unevictable list when: @@ -260,9 +253,9 @@ mlock_fixup() filters several classes of "special" vmas: 2) vmas mapping hugetlbfs page are already effectively pinned into memory. We don't need nor want to mlock() these pages. However, to preserve the - prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup() - will call make_pages_present() in the hugetlbfs vma range to allocate the - huge pages and populate the ptes. + prior behavior of mlock()--before the unevictable/mlock changes-- + mlock_fixup() will call make_pages_present() in the hugetlbfs vma range + to allocate the huge pages and populate the ptes. 3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of kernel pages, such as the vdso page, relay channel pages, etc. These pages @@ -322,7 +315,7 @@ __mlock_vma_pages_range()--the same function used to mlock a vma range-- passing a flag to indicate that munlock() is being performed. Because the vma access protections could have been changed to PROT_NONE after -faulting in and mlocking some pages, get_user_pages() was unreliable for visiting +faulting in and mlocking pages, get_user_pages() was unreliable for visiting these pages for munlocking. Because we don't want to leave pages mlocked(), get_user_pages() was enhanced to accept a flag to ignore the permissions when fetching the pages--all of which should be resident as a result of previous @@ -416,8 +409,8 @@ Mlocked Pages: munmap()/exit()/exec() System Call Handling When unmapping an mlocked region of memory, whether by an explicit call to munmap() or via an internal unmap from exit() or exec() processing, we must munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. -Before the unevictable/mlock changes, mlocking did not mark the pages in any way, -so unmapping them required no processing. +Before the unevictable/mlock changes, mlocking did not mark the pages in any +way, so unmapping them required no processing. To munlock a range of memory under the unevictable/mlock infrastructure, the munmap() hander and task address space tear down function call @@ -517,12 +510,10 @@ couldn't be mlocked. Mlocked pages: try_to_munlock() Reverse Map Scan TODO/FIXME: a better name might be page_mlocked()--analogous to the -page_referenced() reverse map walker--especially if we continue to call this -from shrink_page_list(). See related TODO/FIXME below. +page_referenced() reverse map walker. -When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() System -Call Handling" above--tries to munlock a page, or when shrink_page_list() -encounters an anonymous page that is not yet in the swap cache, they need to +When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() +System Call Handling" above--tries to munlock a page, it needs to determine whether or not the page is mapped by any VM_LOCKED vma, without actually attempting to unmap all ptes from the page. For this purpose, the unevictable/mlock infrastructure introduced a variant of try_to_unmap() called @@ -535,10 +526,7 @@ for VM_LOCKED vmas. When such a vma is found for anonymous pages and file pages mapped in linear VMAs, as in the try_to_unmap() case, the functions attempt to acquire the associated mmap semphore, mlock the page via mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the -pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs -shrink_page_list() that the anonymous page should be culled rather than added -to the swap cache in preparation for a try_to_unmap() that will almost -certainly fail. +pre-clearing of the page's PG_mlocked done by munlock_vma_page. If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() @@ -557,10 +545,7 @@ However, the scan can terminate when it encounters a VM_LOCKED vma and can successfully acquire the vma's mmap semphore for read and mlock the page. Although try_to_munlock() can be called many [very many!] times when munlock()ing a large region or tearing down a large address space that has been -mlocked via mlockall(), overall this is a fairly rare event. In addition, -although shrink_page_list() calls try_to_munlock() for every anonymous page that -it handles that is not yet in the swap cache, on average anonymous pages will -have very short reverse map lists. +mlocked via mlockall(), overall this is a fairly rare event. Mlocked Page: Page Reclaim in shrink_*_list() @@ -588,8 +573,8 @@ Some examples of these unevictable pages on the LRU lists are: munlock_vma_page() was forced to let the page back on to the normal LRU list for vmscan to handle. -shrink_inactive_list() also culls any unevictable pages that it finds -on the inactive lists, again diverting them to the appropriate zone's unevictable +shrink_inactive_list() also culls any unevictable pages that it finds on +the inactive lists, again diverting them to the appropriate zone's unevictable lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from @@ -597,19 +582,7 @@ the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter, but will pass on to shrink_page_list(). shrink_page_list() again culls obviously unevictable pages that it could -encounter for similar reason to shrink_inactive_list(). As already discussed, -shrink_page_list() proactively looks for anonymous pages that should have -PG_mlocked set but don't--these would not be detected by page_evictable()--to -avoid adding them to the swap cache unnecessarily. File pages mapped into +encounter for similar reason to shrink_inactive_list(). Pages mapped into VM_LOCKED vmas but without PG_mlocked set will make it all the way to -try_to_unmap(). shrink_page_list() will divert them to the unevictable list when -try_to_unmap() returns SWAP_MLOCK, as discussed above. - -TODO/FIXME: If we can enhance the swap cache to reliably remove entries -with page_count(page) > 2, as long as all ptes are mapped to the page and -not the swap entry, we can probably remove the call to try_to_munlock() in -shrink_page_list() and just remove the page from the swap cache when -try_to_unmap() returns SWAP_MLOCK. Currently, remove_exclusive_swap_page() -doesn't seem to allow that. - - +try_to_unmap(). shrink_page_list() will divert them to the unevictable list +when try_to_unmap() returns SWAP_MLOCK, as discussed above. diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 169ad423a3d1..4f913857b8a2 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt @@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit real-mode setup code of the kernel. References/settings to it mainly are in: - include/asm-x86/bootparam.h + arch/x86/include/asm/bootparam.h Offset Proto Name Meaning diff --git a/MAINTAINERS b/MAINTAINERS index 141aff67bd6d..094dd52d7309 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -616,7 +616,7 @@ M: mkpetch@internode.on.net S: Maintained ARM/TOSA MACHINE SUPPORT -P: Dmitry Baryshkov +P: Dmitry Eremin-Solenikov M: dbaryshkov@gmail.com P: Dirk Opfer M: dirk@opfer-online.de @@ -1092,11 +1092,8 @@ S: Maintained CHECKPATCH P: Andy Whitcroft -M: apw@shadowen.org -P: Randy Dunlap -M: rdunlap@xenotime.net -P: Joel Schopp -M: jschopp@austin.ibm.com +M: apw@canonical.com +L: linux-kernel@vger.kernel.org S: Supported CISCO 10G ETHERNET DRIVER diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index ca88e54dec93..62b363584b2b 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -1,6 +1,7 @@ #ifndef _ALPHA_ATOMIC_H #define _ALPHA_ATOMIC_H +#include <linux/types.h> #include <asm/barrier.h> #include <asm/system.h> @@ -13,14 +14,6 @@ */ -/* - * Counter is volatile to make sure gcc doesn't try to be clever - * and move things around on us. We need to use _exactly_ the address - * the user gave us, not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; -typedef struct { volatile long counter; } atomic64_t; - #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) #define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 325f881ccb50..ee99723b3a6c 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -12,10 +12,9 @@ #define __ASM_ARM_ATOMIC_H #include <linux/compiler.h> +#include <linux/types.h> #include <asm/system.h> -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 3f9abe0e9aff..f692efddd449 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -92,9 +92,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { - mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); p->ainsn.insn = NULL; } } diff --git a/arch/arm/mach-s3c2410/include/mach/spi.h b/arch/arm/mach-s3c2410/include/mach/spi.h index 774f3adfe8ad..1d300fb112b1 100644 --- a/arch/arm/mach-s3c2410/include/mach/spi.h +++ b/arch/arm/mach-s3c2410/include/mach/spi.h @@ -14,7 +14,7 @@ #define __ASM_ARCH_SPI_H __FILE__ struct s3c2410_spi_info { - unsigned long pin_cs; /* simple gpio cs */ + int pin_cs; /* simple gpio cs */ unsigned int num_cs; /* total chipselects */ int bus_num; /* bus number to use. */ diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 7ef3862a73d0..318815107748 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -14,9 +14,9 @@ #ifndef __ASM_AVR32_ATOMIC_H #define __ASM_AVR32_ATOMIC_H +#include <linux/types.h> #include <asm/system.h> -typedef struct { volatile int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index 0d987373bc01..d547c8df157d 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -7,6 +7,7 @@ */ #include <linux/bug.h> +#include <linux/hardirq.h> #include <linux/init.h> #include <linux/kallsyms.h> #include <linux/kdebug.h> diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 7cf508718605..25776c19064b 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -1,6 +1,7 @@ #ifndef __ARCH_BLACKFIN_ATOMIC__ #define __ARCH_BLACKFIN_ATOMIC__ +#include <linux/types.h> #include <asm/system.h> /* local_irq_XXX() */ /* @@ -13,9 +14,6 @@ * Tony Kou (tonyko@lineo.ca) Lineo Inc. 2001 */ -typedef struct { - int counter; -} atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h index f71ea686a2ea..5718dd8902a1 100644 --- a/arch/cris/include/asm/atomic.h +++ b/arch/cris/include/asm/atomic.h @@ -4,7 +4,7 @@ #define __ASM_CRIS_ATOMIC__ #include <linux/compiler.h> - +#include <linux/types.h> #include <asm/system.h> #include <arch/atomic.h> @@ -13,8 +13,6 @@ * resource counting etc.. */ -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index b4cf0ea97ede..833186c8dc3b 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -1,12 +1,13 @@ #ifndef __ARCH_H8300_ATOMIC__ #define __ARCH_H8300_ATOMIC__ +#include <linux/types.h> + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ -typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 50c2b83fd5a0..d37292bd9875 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -17,12 +17,6 @@ #include <asm/intrinsics.h> #include <asm/system.h> -/* - * On IA-64, counter must always be volatile to ensure that that the - * memory accesses are ordered. - */ -typedef struct { volatile __s32 counter; } atomic_t; -typedef struct { volatile __s64 counter; } atomic64_t; #define ATOMIC_INIT(i) ((atomic_t) { (i) }) #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index f07688da947c..097b84d54e73 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -670,9 +670,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, + p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); + p->ainsn.insn = NULL; + } } /* * We are resuming execution after a single step fault, so the pt_regs diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 054bcd9439aa..56e12903973c 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -692,7 +692,7 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); zone = pgdat->node_zones + ZONE_NORMAL; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", diff --git a/arch/m68knommu/include/asm/atomic.h b/arch/m68knommu/include/asm/atomic.h index d5632a305dae..6bb674855a3f 100644 --- a/arch/m68knommu/include/asm/atomic.h +++ b/arch/m68knommu/include/asm/atomic.h @@ -1,6 +1,7 @@ #ifndef __ARCH_M68KNOMMU_ATOMIC__ #define __ARCH_M68KNOMMU_ATOMIC__ +#include <linux/types.h> #include <asm/system.h> /* @@ -12,7 +13,6 @@ * We do not have SMP m68k systems, so we don't have to deal with that. */ -typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 1232be3885b0..c996c3b4d074 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -15,13 +15,12 @@ #define _ASM_ATOMIC_H #include <linux/irqflags.h> +#include <linux/types.h> #include <asm/barrier.h> #include <asm/cpu-features.h> #include <asm/war.h> #include <asm/system.h> -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } /* @@ -404,8 +403,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -typedef struct { volatile long counter; } atomic64_t; - #define ATOMIC64_INIT(i) { (i) } /* diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 57fcc4a5ebb4..edbfe25c5fc1 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -155,14 +155,11 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) #endif -/* Note that we need not lock read accesses - aligned word writes/reads - * are atomic, so a reader never sees unconsistent values. - * - * Cache-line alignment would conflict with, for example, linux/module.h +/* + * Note that we need not lock read accesses - aligned word writes/reads + * are atomic, so a reader never sees inconsistent values. */ -typedef struct { volatile int counter; } atomic_t; - /* It's possible to reduce all atomic operations to either * __atomic_add_return, atomic_set and atomic_read (the latter * is there only for consistency). @@ -260,8 +257,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -typedef struct { volatile s64 counter; } atomic64_t; - #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) static __inline__ int diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 499be5bdd6fa..b401950f5259 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -5,7 +5,7 @@ * PowerPC atomic operations */ -typedef struct { int counter; } atomic_t; +#include <linux/types.h> #ifdef __KERNEL__ #include <linux/compiler.h> @@ -251,8 +251,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) #ifdef __powerpc64__ -typedef struct { long counter; } atomic64_t; - #define ATOMIC64_INIT(i) { (i) } static __inline__ long atomic64_read(const atomic64_t *v) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 26f0d0ab27a5..b1dafb6a9743 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -18,6 +18,12 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); /* + * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs + * to override the version in mm/hugetlb.c + */ +#define vma_mmu_pagesize vma_mmu_pagesize + +/* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index de79915452c8..989edcdf0297 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -96,9 +96,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } } static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 201c7a5486cb..9920d6a7cf29 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -512,6 +512,13 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); } +unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) +{ + unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); + + return 1UL << mmu_psize_to_shift(psize); +} + /* * Called by asm hashtable.S for doing lazy icache flush */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 53b06ebb3f2f..f00f09a77f12 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size) /* this should work for most non-highmem platforms */ zone = pgdata->node_zones; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 2d184655bc5d..de432f2de2d2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -2,6 +2,7 @@ #define __ARCH_S390_ATOMIC__ #include <linux/compiler.h> +#include <linux/types.h> /* * include/asm-s390/atomic.h @@ -23,9 +24,6 @@ * S390 uses 'Compare And Swap' for atomicity in SMP enviroment */ -typedef struct { - int counter; -} __attribute__ ((aligned (4))) atomic_t; #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ @@ -149,9 +147,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #undef __CS_LOOP #ifdef __s390x__ -typedef struct { - long long counter; -} __attribute__ ((aligned (8))) atomic64_t; #define ATOMIC64_INIT(i) { (i) } #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 569079ec4ff0..9b92856632cf 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -218,9 +218,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } } static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 158b0d6d7046..f0258ca3b17e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -183,7 +183,7 @@ int arch_add_memory(int nid, u64 start, u64 size) rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size)); + rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size)); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index c043ef003028..6327ffbb1992 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -7,16 +7,15 @@ * */ -typedef struct { volatile int counter; } atomic_t; +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/system.h> #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) #define atomic_read(v) ((v)->counter) #define atomic_set(v,i) ((v)->counter = (i)) -#include <linux/compiler.h> -#include <asm/system.h> - #if defined(CONFIG_GUSA_RB) #include <asm/atomic-grb.h> #elif defined(CONFIG_CPU_SH4A) diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 88807a2aacc3..c0aa3d83ec0e 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -13,6 +13,7 @@ */ #include <linux/kernel.h> #include <linux/ptrace.h> +#include <linux/hardirq.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/module.h> diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 6cbef8caeb56..3edf297c829b 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -311,7 +311,8 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdat = NODE_DATA(nid); /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages); + ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL, + start_pfn, nr_pages); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 5c944b5a8040..ce465975a6a5 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -13,8 +13,6 @@ #include <linux/types.h> -typedef struct { volatile int counter; } atomic_t; - #ifdef __KERNEL__ #define ATOMIC_INIT(i) { (i) } diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 5982c5ae7f07..a0a706492696 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -10,9 +10,6 @@ #include <linux/types.h> #include <asm/system.h> -typedef struct { volatile int counter; } atomic_t; -typedef struct { volatile __s64 counter; } atomic64_t; - #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 44e490419495..7384d8accfe7 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -64,11 +64,10 @@ good_area: do { int fault; -survive: + fault = handle_mm_fault(mm, vma, address, is_write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { - err = -ENOMEM; goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; @@ -104,18 +103,14 @@ out: out_nosemaphore: return err; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: - if (is_global_init(current)) { - up_read(&mm->mmap_sem); - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - goto out; + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(&mm->mmap_sem); + pagefault_out_of_memory(); + return 0; } static void bad_segv(struct faultinfo fi, unsigned long ip) @@ -214,9 +209,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, si.si_addr = (void __user *)address; current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); - } else if (err == -ENOMEM) { - printk(KERN_INFO "VM: killing process %s\n", current->comm); - do_exit(SIGKILL); } else { BUG_ON(err != -EFAULT); si.si_signo = SIGSEGV; diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index ad5b9f6ecddf..85b46fba4229 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -2,6 +2,7 @@ #define _ASM_X86_ATOMIC_32_H #include <linux/compiler.h> +#include <linux/types.h> #include <asm/processor.h> #include <asm/cmpxchg.h> @@ -10,15 +11,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } /** diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 279d2a731f3f..8c21731984da 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h @@ -1,25 +1,15 @@ #ifndef _ASM_X86_ATOMIC_64_H #define _ASM_X86_ATOMIC_64_H +#include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> -/* atomic_t should be 32 bit signed type */ - /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } /** @@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -/* An 64bit atomic type */ - -typedef struct { - long counter; -} atomic64_t; +/* The 64-bit atomic type */ #define ATOMIC64_INIT(i) { (i) } diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h deleted file mode 100644 index 8b064bd9c553..000000000000 --- a/arch/x86/include/asm/unwind.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _ASM_X86_UNWIND_H -#define _ASM_X86_UNWIND_H - -#define UNW_PC(frame) ((void)(frame), 0UL) -#define UNW_SP(frame) ((void)(frame), 0UL) -#define UNW_FP(frame) ((void)(frame), 0UL) - -static inline int arch_unw_user_mode(const void *info) -{ - return 0; -} - -#endif /* _ASM_X86_UNWIND_H */ diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 6c27679ec6aa..eead6f8f9218 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); - mutex_unlock(&kprobe_mutex); + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); + p->ainsn.insn = NULL; + } } static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ce6650eb64e9..c9a666cdd3db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -20,7 +20,6 @@ #include <linux/module.h> #include <linux/ptrace.h> #include <linux/string.h> -#include <linux/unwind.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/kexec.h> @@ -51,7 +50,6 @@ #include <asm/debugreg.h> #include <asm/atomic.h> #include <asm/system.h> -#include <asm/unwind.h> #include <asm/traps.h> #include <asm/desc.h> #include <asm/i387.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 57ec8c86a877..9e268b6b204e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; -again: /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -859,25 +858,14 @@ no_context: oops_end(flags, regs, sig); #endif -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - /* - * Re-lookup the vma - in theory the vma tree might - * have changed: - */ - goto again; - } - - printk("VM: killing process %s\n", tsk->comm); - if (error_code & PF_USER) - do_group_exit(SIGKILL); - goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f99a6c6c432e..544d724caeee 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9f7a0d24d42a..54c437e96541 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size) if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); return ret; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 38aca048e951..66a9d8145562 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -41,6 +41,7 @@ #include <linux/pm_qos_params.h> #include <linux/clockchips.h> #include <linux/cpuidle.h> +#include <linux/irqflags.h> /* * Include the apic definitions for x86 to have the APIC timer related defines diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5260e9e0df48..989429cfed88 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -347,8 +347,9 @@ static inline int memory_probe_init(void) * section belongs to... */ -static int add_memory_block(unsigned long node_id, struct mem_section *section, - unsigned long state, int phys_device) +static int add_memory_block(int nid, struct mem_section *section, + unsigned long state, int phys_device, + enum mem_add_context context) { struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); int ret = 0; @@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, ret = mem_create_simple_file(mem, phys_device); if (!ret) ret = mem_create_simple_file(mem, removable); + if (!ret) { + if (context == HOTPLUG) + ret = register_mem_sect_under_node(mem, nid); + } return ret; } @@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, * * This could be made generic for all sysdev classes. */ -static struct memory_block *find_memory_block(struct mem_section *section) +struct memory_block *find_memory_block(struct mem_section *section) { struct kobject *kobj; struct sys_device *sysdev; @@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, struct memory_block *mem; mem = find_memory_block(section); + unregister_mem_sect_under_nodes(mem); mem_remove_simple_file(mem, phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); @@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, * need an interface for the VM to add new memory regions, * but without onlining it. */ -int register_new_memory(struct mem_section *section) +int register_new_memory(int nid, struct mem_section *section) { - return add_memory_block(0, section, MEM_OFFLINE, 0); + return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG); } int unregister_memory_section(struct mem_section *section) @@ -458,7 +464,8 @@ int __init memory_dev_init(void) for (i = 0; i < NR_MEM_SECTIONS; i++) { if (!present_section_nr(i)) continue; - err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); + err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, + 0, BOOT); if (!ret) ret = err; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 91636cd8b6c9..43fa90b837ee 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -6,6 +6,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/memory.h> #include <linux/node.h> #include <linux/hugetlb.h> #include <linux/cpumask.h> @@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) return 0; } +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#define page_initialized(page) (page->lru.next) + +static int get_nid_for_pfn(unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid_within(pfn)) + return -1; + page = pfn_to_page(pfn); + if (!page_initialized(page)) + return -1; + return pfn_to_nid(pfn); +} + +/* register memory section under specified node if it spans that node */ +int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) +{ + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + if (!node_online(nid)) + return 0; + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + int page_nid; + + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; + return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, + &mem_blk->sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + /* mem section does not span the specified node */ + return 0; +} + +/* unregister memory section under all nodes that it spans */ +int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +{ + nodemask_t unlinked_nodes; + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + nodes_clear(unlinked_nodes); + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + unsigned int nid; + + nid = get_nid_for_pfn(pfn); + if (nid < 0) + continue; + if (!node_online(nid)) + continue; + if (node_test_and_set(nid, unlinked_nodes)) + continue; + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + return 0; +} + +static int link_mem_sections(int nid) +{ + unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; + unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; + unsigned long pfn; + int err = 0; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long section_nr = pfn_to_section_nr(pfn); + struct mem_section *mem_sect; + struct memory_block *mem_blk; + int ret; + + if (!present_section_nr(section_nr)) + continue; + mem_sect = __nr_to_section(section_nr); + mem_blk = find_memory_block(mem_sect); + ret = register_mem_sect_under_node(mem_blk, nid); + if (!err) + err = ret; + + /* discard ref obtained in find_memory_block() */ + kobject_put(&mem_blk->sysdev.kobj); + } + return err; +} +#else +static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + int register_one_node(int nid) { int error = 0; @@ -267,6 +367,9 @@ int register_one_node(int nid) if (cpu_to_node(cpu) == nid) register_cpu_under_node(cpu, nid); } + + /* link memory sections under this node */ + error = link_mem_sections(nid); } return error; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 1697043119bd..35914b6e1d2a 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -841,7 +841,7 @@ config JS_RTC config GEN_RTC tristate "Generic /dev/rtc emulation" - depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 + depends on RTC!=y && !IA64 && !ARM && !M32R && !MIPS && !SPARC && !FRV && !S390 && !SUPERH && !AVR32 && !BLACKFIN ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you diff --git a/drivers/char/consolemap.c b/drivers/char/consolemap.c index 4246b8e36cb3..45d3e80156d4 100644 --- a/drivers/char/consolemap.c +++ b/drivers/char/consolemap.c @@ -554,7 +554,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list) __get_user(fontpos, &list->fontpos); if ((err1 = con_insert_unipair(p, unicode,fontpos)) != 0) err = err1; - list++; + list++; } if (con_unify_unimap(vc, p)) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 6431f6921a67..3586b3b3df3f 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -425,9 +425,6 @@ static ssize_t read_oldmem(struct file *file, char __user *buf, } #endif -extern long vread(char *buf, char *addr, unsigned long count); -extern long vwrite(char *buf, char *addr, unsigned long count); - #ifdef CONFIG_DEVKMEM /* * This function reads the *virtual* memory as seen by the kernel. diff --git a/drivers/char/random.c b/drivers/char/random.c index c7afc068c28d..7c13581ca9cd 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -407,7 +407,7 @@ struct entropy_store { /* read-write data: */ spinlock_t lock; unsigned add_ptr; - int entropy_count; /* Must at no time exceed ->POOLBITS! */ + int entropy_count; int input_rotate; }; @@ -767,11 +767,10 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, { unsigned long flags; - BUG_ON(r->entropy_count > r->poolinfo->POOLBITS); - /* Hold lock while accounting */ spin_lock_irqsave(&r->lock, flags); + BUG_ON(r->entropy_count > r->poolinfo->POOLBITS); DEBUG_ENT("trying to extract %d bits from %s\n", nbytes * 8, r->name); diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 94966edfb44d..d41b9f6f7903 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -82,7 +82,7 @@ static void sysrq_handle_loglevel(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_loglevel_op = { .handler = sysrq_handle_loglevel, - .help_msg = "loglevel0-8", + .help_msg = "loglevel(0-9)", .action_msg = "Changing Loglevel", .enable_mask = SYSRQ_ENABLE_LOG, }; @@ -233,7 +233,7 @@ static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) static struct sysrq_key_op sysrq_showallcpus_op = { .handler = sysrq_handle_showallcpus, - .help_msg = "aLlcpus", + .help_msg = "show-backtrace-all-active-cpus(L)", .action_msg = "Show backtrace of all active CPUs", .enable_mask = SYSRQ_ENABLE_DUMP, }; @@ -247,7 +247,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_showregs_op = { .handler = sysrq_handle_showregs, - .help_msg = "showPc", + .help_msg = "show-registers(P)", .action_msg = "Show Regs", .enable_mask = SYSRQ_ENABLE_DUMP, }; @@ -258,7 +258,7 @@ static void sysrq_handle_showstate(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_showstate_op = { .handler = sysrq_handle_showstate, - .help_msg = "showTasks", + .help_msg = "show-task-states(T)", .action_msg = "Show State", .enable_mask = SYSRQ_ENABLE_DUMP, }; @@ -269,7 +269,7 @@ static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_showstate_blocked_op = { .handler = sysrq_handle_showstate_blocked, - .help_msg = "shoW-blocked-tasks", + .help_msg = "show-blocked-tasks(W)", .action_msg = "Show Blocked State", .enable_mask = SYSRQ_ENABLE_DUMP, }; @@ -297,7 +297,7 @@ static void sysrq_handle_showmem(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_showmem_op = { .handler = sysrq_handle_showmem, - .help_msg = "showMem", + .help_msg = "show-memory-usage(M)", .action_msg = "Show Memory", .enable_mask = SYSRQ_ENABLE_DUMP, }; @@ -323,7 +323,7 @@ static void sysrq_handle_term(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_term_op = { .handler = sysrq_handle_term, - .help_msg = "tErm", + .help_msg = "terminate-all-tasks(E)", .action_msg = "Terminate All Tasks", .enable_mask = SYSRQ_ENABLE_SIGNAL, }; @@ -341,7 +341,7 @@ static void sysrq_handle_moom(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_moom_op = { .handler = sysrq_handle_moom, - .help_msg = "Full", + .help_msg = "memory-full-oom-kill(F)", .action_msg = "Manual OOM execution", .enable_mask = SYSRQ_ENABLE_SIGNAL, }; @@ -353,7 +353,7 @@ static void sysrq_handle_kill(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_kill_op = { .handler = sysrq_handle_kill, - .help_msg = "kIll", + .help_msg = "kill-all-tasks(I)", .action_msg = "Kill All Tasks", .enable_mask = SYSRQ_ENABLE_SIGNAL, }; @@ -364,7 +364,7 @@ static void sysrq_handle_unrt(int key, struct tty_struct *tty) } static struct sysrq_key_op sysrq_unrt_op = { .handler = sysrq_handle_unrt, - .help_msg = "Nice", + .help_msg = "nice-all-RT-tasks(N)", .action_msg = "Nice All RT Tasks", .enable_mask = SYSRQ_ENABLE_RTNICE, }; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index e2667a8c2997..eee47fd16d79 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -109,6 +109,13 @@ config EDAC_X38 Support for error detection and correction on the Intel X38 server chipsets. +config EDAC_I5400 + tristate "Intel 5400 (Seaburg) chipsets" + depends on EDAC_MM_EDAC && PCI && X86 + help + Support for error detection and correction the Intel + i5400 MCH chipset (Seaburg). + config EDAC_I82860 tristate "Intel 82860" depends on EDAC_MM_EDAC && PCI && X86_32 diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 62c2d9bad8dc..b75196927de3 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -20,6 +20,7 @@ endif obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_I5100) += i5100_edac.o +obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 4041e9143283..ca9113e1c106 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -333,7 +333,7 @@ static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev) fail0: edac_printk(KERN_WARNING, EDAC_MC, "%s (%s) %s %s already assigned %d\n", - rover->dev->bus_id, edac_dev_name(rover), + dev_name(rover->dev), edac_dev_name(rover), rover->mod_name, rover->ctl_name, rover->dev_idx); return 1; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d110392d48f4..25d66940b4fa 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -401,7 +401,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) fail0: edac_printk(KERN_WARNING, EDAC_MC, - "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, + "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); return 1; diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 22ec9d5d4312..5d3c8083a40e 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -150,7 +150,7 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci) fail0: edac_printk(KERN_WARNING, EDAC_PCI, "%s (%s) %s %s already assigned %d\n", - rover->dev->bus_id, edac_dev_name(rover), + dev_name(rover->dev), edac_dev_name(rover), rover->mod_name, rover->ctl_name, rover->pci_idx); return 1; diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 5c153dccc95e..422728cfe994 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -569,7 +569,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) local_irq_restore(flags); - debugf4("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + debugf4("PCI STATUS= 0x%04x %s\n", status, dev_name(&dev->dev)); /* check the status reg for errors on boards NOT marked as broken * if broken, we cannot trust any of the status bits @@ -600,13 +600,13 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) } - debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); + debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev_name(&dev->dev)); if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { /* On bridges, need to examine secondary status register */ status = get_pci_parity_status(dev, 1); - debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev_name(&dev->dev)); /* check the secondary status reg for errors, * on NOT broken boards diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c new file mode 100644 index 000000000000..b08b6d8e2dc7 --- /dev/null +++ b/drivers/edac/i5400_edac.c @@ -0,0 +1,1476 @@ +/* + * Intel 5400 class Memory Controllers kernel module (Seaburg) + * + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Copyright (c) 2008 by: + * Ben Woodard <woodard@redhat.com> + * Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + * + * Forked and adapted from the i5000_edac driver which was + * written by Douglas Thompson Linux Networx <norsk5@xmission.com> + * + * This module is based on the following document: + * + * Intel 5400 Chipset Memory Controller Hub (MCH) - Datasheet + * http://developer.intel.com/design/chipsets/datashts/313070.htm + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/edac.h> +#include <linux/mmzone.h> + +#include "edac_core.h" + +/* + * Alter this version for the I5400 module when modifications are made + */ +#define I5400_REVISION " Ver: 1.0.0 " __DATE__ + +#define EDAC_MOD_STR "i5400_edac" + +#define i5400_printk(level, fmt, arg...) \ + edac_printk(level, "i5400", fmt, ##arg) + +#define i5400_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg) + +/* Limits for i5400 */ +#define NUM_MTRS_PER_BRANCH 4 +#define CHANNELS_PER_BRANCH 2 +#define MAX_CHANNELS 4 +#define MAX_DIMMS (MAX_CHANNELS * 4) /* Up to 4 DIMM's per channel */ +#define MAX_CSROWS (MAX_DIMMS * 2) /* max possible csrows per channel */ + +/* Device 16, + * Function 0: System Address + * Function 1: Memory Branch Map, Control, Errors Register + * Function 2: FSB Error Registers + * + * All 3 functions of Device 16 (0,1,2) share the SAME DID and + * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2), + * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1 + * for device 21 (0,1). + */ + + /* OFFSETS for Function 0 */ +#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ +#define MAXCH 0x56 /* Max Channel Number */ +#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ + + /* OFFSETS for Function 1 */ +#define TOLM 0x6C +#define REDMEMB 0x7C +#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ +#define MIR0 0x80 +#define MIR1 0x84 +#define AMIR0 0x8c +#define AMIR1 0x90 + + /* Fatal error registers */ +#define FERR_FAT_FBD 0x98 /* also called as FERR_FAT_FB_DIMM at datasheet */ +#define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ + +#define NERR_FAT_FBD 0x9c +#define FERR_NF_FBD 0xa0 /* also called as FERR_NFAT_FB_DIMM at datasheet */ + + /* Non-fatal error register */ +#define NERR_NF_FBD 0xa4 + + /* Enable error mask */ +#define EMASK_FBD 0xa8 + +#define ERR0_FBD 0xac +#define ERR1_FBD 0xb0 +#define ERR2_FBD 0xb4 +#define MCERR_FBD 0xb8 + + /* No OFFSETS for Device 16 Function 2 */ + +/* + * Device 21, + * Function 0: Memory Map Branch 0 + * + * Device 22, + * Function 0: Memory Map Branch 1 + */ + + /* OFFSETS for Function 0 */ +#define AMBPRESENT_0 0x64 +#define AMBPRESENT_1 0x66 +#define MTR0 0x80 +#define MTR1 0x82 +#define MTR2 0x84 +#define MTR3 0x86 + + /* OFFSETS for Function 1 */ +#define NRECFGLOG 0x74 +#define RECFGLOG 0x78 +#define NRECMEMA 0xbe +#define NRECMEMB 0xc0 +#define NRECFB_DIMMA 0xc4 +#define NRECFB_DIMMB 0xc8 +#define NRECFB_DIMMC 0xcc +#define NRECFB_DIMMD 0xd0 +#define NRECFB_DIMME 0xd4 +#define NRECFB_DIMMF 0xd8 +#define REDMEMA 0xdC +#define RECMEMA 0xf0 +#define RECMEMB 0xf4 +#define RECFB_DIMMA 0xf8 +#define RECFB_DIMMB 0xec +#define RECFB_DIMMC 0xf0 +#define RECFB_DIMMD 0xf4 +#define RECFB_DIMME 0xf8 +#define RECFB_DIMMF 0xfC + +/* + * Error indicator bits and masks + * Error masks are according with Table 5-17 of i5400 datasheet + */ + +enum error_mask { + EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ + EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M3 = 1<<2, /* Reserved */ + EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ + EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M6 = 1<<5, /* Unsupported on i5400 */ + EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ + EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M10 = 1<<9, /* Unsupported on i5400 */ + EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ + EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ + EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ + EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ + EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ + EMASK_M18 = 1<<17, /* Unsupported on i5400 */ + EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ + EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ + EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ + EMASK_M22 = 1<<21, /* SPD protocol Error */ + EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ + EMASK_M24 = 1<<23, /* Refresh error */ + EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ + EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ + EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ + EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ + EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ +}; + +/* + * Names to translate bit error into something useful + */ +static const char *error_name[] = { + [0] = "Memory Write error on non-redundant retry", + [1] = "Memory or FB-DIMM configuration CRC read error", + /* Reserved */ + [3] = "Uncorrectable Data ECC on Replay", + [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M6 Unsupported on i5400 */ + [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [7] = "Aliased Uncorrectable Patrol Data ECC", + [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M10 Unsupported on i5400 */ + [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [11] = "Non-Aliased Uncorrectable Patrol Data ECC", + [12] = "Memory Write error on first attempt", + [13] = "FB-DIMM Configuration Write error on first attempt", + [14] = "Memory or FB-DIMM configuration CRC read error", + [15] = "Channel Failed-Over Occurred", + [16] = "Correctable Non-Mirrored Demand Data ECC", + /* M18 Unsupported on i5400 */ + [18] = "Correctable Resilver- or Spare-Copy Data ECC", + [19] = "Correctable Patrol Data ECC", + [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", + [21] = "SPD protocol Error", + [22] = "Non-Redundant Fast Reset Timeout", + [23] = "Refresh error", + [24] = "Memory Write error on redundant retry", + [25] = "Redundant Fast Reset Timeout", + [26] = "Correctable Counter Threshold Exceeded", + [27] = "DIMM-Spare Copy Completed", + [28] = "DIMM-Isolation Completed", +}; + +/* Fatal errors */ +#define ERROR_FAT_MASK (EMASK_M1 | \ + EMASK_M2 | \ + EMASK_M23) + +/* Correctable errors */ +#define ERROR_NF_CORRECTABLE (EMASK_M27 | \ + EMASK_M20 | \ + EMASK_M19 | \ + EMASK_M18 | \ + EMASK_M17 | \ + EMASK_M16) +#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ + EMASK_M28) +#define ERROR_NF_SPD_PROTOCOL (EMASK_M22) +#define ERROR_NF_NORTH_CRC (EMASK_M21) + +/* Recoverable errors */ +#define ERROR_NF_RECOVERABLE (EMASK_M26 | \ + EMASK_M25 | \ + EMASK_M24 | \ + EMASK_M15 | \ + EMASK_M14 | \ + EMASK_M13 | \ + EMASK_M12 | \ + EMASK_M11 | \ + EMASK_M9 | \ + EMASK_M8 | \ + EMASK_M7 | \ + EMASK_M5) + +/* uncorrectable errors */ +#define ERROR_NF_UNCORRECTABLE (EMASK_M4) + +/* mask to all non-fatal errors */ +#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ + ERROR_NF_UNCORRECTABLE | \ + ERROR_NF_RECOVERABLE | \ + ERROR_NF_DIMM_SPARE | \ + ERROR_NF_SPD_PROTOCOL | \ + ERROR_NF_NORTH_CRC) + +/* + * Define error masks for the several registers + */ + +/* Enable all fatal and non fatal errors */ +#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) + +/* mask for fatal error registers */ +#define FERR_FAT_MASK ERROR_FAT_MASK + +/* masks for non-fatal error register */ +static inline int to_nf_mask(unsigned int mask) +{ + return (mask & EMASK_M29) | (mask >> 3); +}; + +static inline int from_nf_ferr(unsigned int mask) +{ + return (mask & EMASK_M29) | /* Bit 28 */ + (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ +}; + +#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) +#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) +#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) +#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) +#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) +#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) +#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) + +/* Defines to extract the vaious fields from the + * MTRx - Memory Technology Registers + */ +#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 10)) +#define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 9)) +#define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 8)) ? 8 : 4) +#define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 6)) ? 8 : 4) +#define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2) +#define MTR_DIMM_RANK(mtr) (((mtr) >> 5) & 0x1) +#define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1) +#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) +#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) +#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) +#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) + +/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ +static inline int extract_fbdchan_indx(u32 x) +{ + return (x>>28) & 0x3; +} + +#ifdef CONFIG_EDAC_DEBUG +/* MTR NUMROW */ +static const char *numrow_toString[] = { + "8,192 - 13 rows", + "16,384 - 14 rows", + "32,768 - 15 rows", + "65,536 - 16 rows" +}; + +/* MTR NUMCOL */ +static const char *numcol_toString[] = { + "1,024 - 10 columns", + "2,048 - 11 columns", + "4,096 - 12 columns", + "reserved" +}; +#endif + +/* Device name and register DID (Device ID) */ +struct i5400_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i5400_dev_info i5400_devs[] = { + { + .ctl_name = "I5400", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_5400_ERR, + }, +}; + +struct i5400_dimm_info { + int megabytes; /* size, 0 means not present */ + int dual_rank; +}; + +/* driver private data structure */ +struct i5400_pvt { + struct pci_dev *system_address; /* 16.0 */ + struct pci_dev *branchmap_werrors; /* 16.1 */ + struct pci_dev *fsb_error_regs; /* 16.2 */ + struct pci_dev *branch_0; /* 21.0 */ + struct pci_dev *branch_1; /* 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u16 mir0, mir1; + + u16 b0_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ + u16 b0_ambpresent0; /* Branch 0, Channel 0 */ + u16 b0_ambpresent1; /* Brnach 0, Channel 1 */ + + u16 b1_mtr[NUM_MTRS_PER_BRANCH]; /* Memory Technlogy Reg */ + u16 b1_ambpresent0; /* Branch 1, Channel 8 */ + u16 b1_ambpresent1; /* Branch 1, Channel 1 */ + + /* DIMM information matrix, allocating architecture maximums */ + struct i5400_dimm_info dimm_info[MAX_CSROWS][MAX_CHANNELS]; + + /* Actual values for this controller */ + int maxch; /* Max channels */ + int maxdimmperch; /* Max DIMMs per channel */ +}; + +/* I5400 MCH error information retrieved from Hardware */ +struct i5400_error_info { + /* These registers are always read from the MC */ + u32 ferr_fat_fbd; /* First Errors Fatal */ + u32 nerr_fat_fbd; /* Next Errors Fatal */ + u32 ferr_nf_fbd; /* First Errors Non-Fatal */ + u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ + + /* These registers are input ONLY if there was a Recoverable Error */ + u32 redmemb; /* Recoverable Mem Data Error log B */ + u16 recmema; /* Recoverable Mem Error log A */ + u32 recmemb; /* Recoverable Mem Error log B */ + + /* These registers are input ONLY if there was a Non-Rec Error */ + u16 nrecmema; /* Non-Recoverable Mem log A */ + u16 nrecmemb; /* Non-Recoverable Mem log B */ + +}; + +/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and + 5400 better to use an inline function than a macro in this case */ +static inline int nrec_bank(struct i5400_error_info *info) +{ + return ((info->nrecmema) >> 12) & 0x7; +} +static inline int nrec_rank(struct i5400_error_info *info) +{ + return ((info->nrecmema) >> 8) & 0xf; +} +static inline int nrec_buf_id(struct i5400_error_info *info) +{ + return ((info->nrecmema)) & 0xff; +} +static inline int nrec_rdwr(struct i5400_error_info *info) +{ + return (info->nrecmemb) >> 31; +} +/* This applies to both NREC and REC string so it can be used with nrec_rdwr + and rec_rdwr */ +static inline const char *rdwr_str(int rdwr) +{ + return rdwr ? "Write" : "Read"; +} +static inline int nrec_cas(struct i5400_error_info *info) +{ + return ((info->nrecmemb) >> 16) & 0x1fff; +} +static inline int nrec_ras(struct i5400_error_info *info) +{ + return (info->nrecmemb) & 0xffff; +} +static inline int rec_bank(struct i5400_error_info *info) +{ + return ((info->recmema) >> 12) & 0x7; +} +static inline int rec_rank(struct i5400_error_info *info) +{ + return ((info->recmema) >> 8) & 0xf; +} +static inline int rec_rdwr(struct i5400_error_info *info) +{ + return (info->recmemb) >> 31; +} +static inline int rec_cas(struct i5400_error_info *info) +{ + return ((info->recmemb) >> 16) & 0x1fff; +} +static inline int rec_ras(struct i5400_error_info *info) +{ + return (info->recmemb) & 0xffff; +} + +static struct edac_pci_ctl_info *i5400_pci; + +/* + * i5400_get_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i5400_get_error_info(struct mem_ctl_info *mci, + struct i5400_error_info *info) +{ + struct i5400_pvt *pvt; + u32 value; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + + /* Mask only the bits that the doc says are valid + */ + value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); + + /* If there is an error, then read in the + NEXT FATAL error register and the Memory Error Log Register A + */ + if (value & FERR_FAT_MASK) { + info->ferr_fat_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_FAT_FBD, &info->nerr_fat_fbd); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMA, &info->nrecmema); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMB, &info->nrecmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_FAT_FBD, value); + } else { + info->ferr_fat_fbd = 0; + info->nerr_fat_fbd = 0; + info->nrecmema = 0; + info->nrecmemb = 0; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + + /* If there is an error, then read in the 1st NON-FATAL error + * register as well */ + if (value & FERR_NF_MASK) { + info->ferr_nf_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_NF_FBD, &info->nerr_nf_fbd); + pci_read_config_word(pvt->branchmap_werrors, + RECMEMA, &info->recmema); + pci_read_config_dword(pvt->branchmap_werrors, + RECMEMB, &info->recmemb); + pci_read_config_dword(pvt->branchmap_werrors, + REDMEMB, &info->redmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_NF_FBD, value); + } else { + info->ferr_nf_fbd = 0; + info->nerr_nf_fbd = 0; + info->recmema = 0; + info->recmemb = 0; + info->redmemb = 0; + } +} + +/* + * i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, + * struct i5400_error_info *info, + * int handle_errors); + * + * handle the Intel FATAL and unrecoverable errors, if any + */ +static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci, + struct i5400_error_info *info, + unsigned long allErrors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + int branch; + int channel; + int bank; + int buf_id; + int rank; + int rdwr; + int ras, cas; + int errnum; + char *type = NULL; + + if (!allErrors) + return; /* if no error, return now */ + + if (allErrors & ERROR_FAT_MASK) + type = "FATAL"; + else if (allErrors & FERR_NF_UNCORRECTABLE) + type = "NON-FATAL uncorrected"; + else + type = "NON-FATAL recoverable"; + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + branch = extract_fbdchan_indx(info->ferr_fat_fbd); + channel = branch; + + /* Use the NON-Recoverable macros to extract data */ + bank = nrec_bank(info); + rank = nrec_rank(info); + buf_id = nrec_buf_id(info); + rdwr = nrec_rdwr(info); + ras = nrec_ras(info); + cas = nrec_cas(info); + + debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + buf_id, rdwr_str(rdwr), ras, cas); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " + "RAS=%d CAS=%d %s Err=0x%lx (%s))", + type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, + type, allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); +} + +/* + * i5400_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i5400_error_info *info, + * int handle_errors); + * + * handle the Intel NON-FATAL errors, if any + */ +static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci, + struct i5400_error_info *info) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + unsigned long allErrors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + int errnum; + + /* mask off the Error bits that are possible */ + allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { + i5400_proccess_non_recoverable_info(mci, info, allErrors); + return; + } + + /* Correctable errors */ + if (allErrors & ERROR_NF_CORRECTABLE) { + debugf0("\tCorrected bits= 0x%lx\n", allErrors); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + channel = 0; + if (REC_ECC_LOCATOR_ODD(info->redmemb)) + channel = 1; + + /* Convert channel to be based from zero, instead of + * from branch base of 0 */ + channel += branch; + + bank = rec_bank(info); + rank = rec_rank(info); + rdwr = rec_rdwr(info); + ras = rec_ras(info); + cas = rec_cas(info); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + debugf0("\t\tCSROW= %d Channel= %d (Branch %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr_str(rdwr), ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " + "RAS=%d CAS=%d, CE Err=0x%lx (%s))", + branch >> 1, bank, rdwr_str(rdwr), ras, cas, + allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ce(mci, rank, channel, msg); + + return; + } + + /* Miscelaneous errors */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + i5400_mc_printk(mci, KERN_EMERG, + "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", + branch >> 1, allErrors, error_name[errnum]); +} + +/* + * i5400_process_error_info Process the error info that is + * in the 'info' structure, previously retrieved from hardware + */ +static void i5400_process_error_info(struct mem_ctl_info *mci, + struct i5400_error_info *info) +{ u32 allErrors; + + /* First handle any fatal errors that occurred */ + allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); + i5400_proccess_non_recoverable_info(mci, info, allErrors); + + /* now handle any non-fatal errors that occurred */ + i5400_process_nonfatal_error_info(mci, info); +} + +/* + * i5400_clear_error Retrieve any error from the hardware + * but do NOT process that error. + * Used for 'clearing' out of previous errors + * Called by the Core module. + */ +static void i5400_clear_error(struct mem_ctl_info *mci) +{ + struct i5400_error_info info; + + i5400_get_error_info(mci, &info); +} + +/* + * i5400_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void i5400_check_error(struct mem_ctl_info *mci) +{ + struct i5400_error_info info; + debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i5400_get_error_info(mci, &info); + i5400_process_error_info(mci, &info); +} + +/* + * i5400_put_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void i5400_put_devices(struct mem_ctl_info *mci) +{ + struct i5400_pvt *pvt; + + pvt = mci->pvt_info; + + /* Decrement usage count for devices */ + pci_dev_put(pvt->branch_1); + pci_dev_put(pvt->branch_0); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); +} + +/* + * i5400_get_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx) +{ + struct i5400_pvt *pvt; + struct pci_dev *pdev; + + pvt = mci->pvt_info; + pvt->branchmap_werrors = NULL; + pvt->fsb_error_regs = NULL; + pvt->branch_0 = NULL; + pvt->branch_1 = NULL; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_ERR, pdev); + if (!pdev) { + /* End of list, leave */ + i5400_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x ERR funcs " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_ERR); + goto error; + } + + /* Store device 16 funcs 1 and 2 */ + switch (PCI_FUNC(pdev->devfn)) { + case 1: + pvt->branchmap_werrors = pdev; + break; + case 2: + pvt->fsb_error_regs = pdev; + break; + } + } + + debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + + pvt->branch_0 = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_FBD0, NULL); + if (!pvt->branch_0) { + i5400_printk(KERN_ERR, + "MC: 'BRANCH 0' device not found:" + "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0); + goto error; + } + + /* If this device claims to have more than 2 channels then + * fetch Branch 1's information + */ + if (pvt->maxch < CHANNELS_PER_BRANCH) + return 0; + + pvt->branch_1 = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_FBD1, NULL); + if (!pvt->branch_1) { + i5400_printk(KERN_ERR, + "MC: 'BRANCH 1' device not found:" + "vendor 0x%x device 0x%x Func 0 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5400_FBD1); + goto error; + } + + return 0; + +error: + i5400_put_devices(mci); + return -ENODEV; +} + +/* + * determine_amb_present + * + * the information is contained in NUM_MTRS_PER_BRANCH different + * registers determining which of the NUM_MTRS_PER_BRANCH requires + * knowing which channel is in question + * + * 2 branches, each with 2 channels + * b0_ambpresent0 for channel '0' + * b0_ambpresent1 for channel '1' + * b1_ambpresent0 for channel '2' + * b1_ambpresent1 for channel '3' + */ +static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel) +{ + int amb_present; + + if (channel < CHANNELS_PER_BRANCH) { + if (channel & 0x1) + amb_present = pvt->b0_ambpresent1; + else + amb_present = pvt->b0_ambpresent0; + } else { + if (channel & 0x1) + amb_present = pvt->b1_ambpresent1; + else + amb_present = pvt->b1_ambpresent0; + } + + return amb_present; +} + +/* + * determine_mtr(pvt, csrow, channel) + * + * return the proper MTR register as determine by the csrow and desired channel + */ +static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel) +{ + int mtr; + int n; + + /* There is one MTR for each slot pair of FB-DIMMs, + Each slot may have one or two ranks (2 csrows), + Each slot pair may be at branch 0 or branch 1. + So, csrow should be divided by eight + */ + n = csrow >> 3; + + if (n >= NUM_MTRS_PER_BRANCH) { + debugf0("ERROR: trying to access an invalid csrow: %d\n", + csrow); + return 0; + } + + if (channel < CHANNELS_PER_BRANCH) + mtr = pvt->b0_mtr[n]; + else + mtr = pvt->b1_mtr[n]; + + return mtr; +} + +/* + */ +static void decode_mtr(int slot_row, u16 mtr) +{ + int ans; + + ans = MTR_DIMMS_PRESENT(mtr); + + debugf2("\tMTR%d=0x%x: DIMMs are %s\n", slot_row, mtr, + ans ? "Present" : "NOT Present"); + if (!ans) + return; + + debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + + debugf2("\t\tELECTRICAL THROTTLING is %s\n", + MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + + debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANK(mtr) ? "double" : "single"); + debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); + debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); +} + +static void handle_channel(struct i5400_pvt *pvt, int csrow, int channel, + struct i5400_dimm_info *dinfo) +{ + int mtr; + int amb_present_reg; + int addrBits; + + mtr = determine_mtr(pvt, csrow, channel); + if (MTR_DIMMS_PRESENT(mtr)) { + amb_present_reg = determine_amb_present_reg(pvt, channel); + + /* Determine if there is a DIMM present in this DIMM slot */ + if (amb_present_reg & (1 << (csrow >> 1))) { + dinfo->dual_rank = MTR_DIMM_RANK(mtr); + + if (!((dinfo->dual_rank == 0) && + ((csrow & 0x1) == 0x1))) { + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr); + /* Add thenumber of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; + } + } + } +} + +/* + * calculate_dimm_size + * + * also will output a DIMM matrix map, if debug is enabled, for viewing + * how the DIMMs are populated + */ +static void calculate_dimm_size(struct i5400_pvt *pvt) +{ + struct i5400_dimm_info *dinfo; + int csrow, max_csrows; + char *p, *mem_buffer; + int space, n; + int channel; + + /* ================= Generate some debug output ================= */ + space = PAGE_SIZE; + mem_buffer = p = kmalloc(space, GFP_KERNEL); + if (p == NULL) { + i5400_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", + __FILE__, __func__); + return; + } + + /* Scan all the actual CSROWS (which is # of DIMMS * 2) + * and calculate the information for each DIMM + * Start with the highest csrow first, to display it first + * and work toward the 0th csrow + */ + max_csrows = pvt->maxdimmperch * 2; + for (csrow = max_csrows - 1; csrow >= 0; csrow--) { + + /* on an odd csrow, first output a 'boundary' marker, + * then reset the message buffer */ + if (csrow & 0x1) { + n = snprintf(p, space, "---------------------------" + "--------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + n = snprintf(p, space, "csrow %2d ", csrow); + p += n; + space -= n; + + for (channel = 0; channel < pvt->maxch; channel++) { + dinfo = &pvt->dimm_info[csrow][channel]; + handle_channel(pvt, csrow, channel, dinfo); + n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + p += n; + space -= n; + } + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + + /* Output the last bottom 'boundary' marker */ + n = snprintf(p, space, "---------------------------" + "--------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + /* now output the 'channel' labels */ + n = snprintf(p, space, " "); + p += n; + space -= n; + for (channel = 0; channel < pvt->maxch; channel++) { + n = snprintf(p, space, "channel %d | ", channel); + p += n; + space -= n; + } + + /* output the last message and free buffer */ + debugf2("%s\n", mem_buffer); + kfree(mem_buffer); +} + +/* + * i5400_get_mc_regs read in the necessary registers and + * cache locally + * + * Fills in the private data members + */ +static void i5400_get_mc_regs(struct mem_ctl_info *mci) +{ + struct i5400_pvt *pvt; + u32 actual_tolm; + u16 limit; + int slot_row; + int maxch; + int maxdimmperch; + int way0, way1; + + pvt = mci->pvt_info; + + pci_read_config_dword(pvt->system_address, AMBASE, + (u32 *) &pvt->ambase); + pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32), + ((u32 *) &pvt->ambase) + sizeof(u32)); + + maxdimmperch = pvt->maxdimmperch; + maxch = pvt->maxch; + + debugf2("AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n", + (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch); + + /* Get the Branch Map regs */ + pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pvt->tolm >>= 12; + debugf2("\nTOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, + pvt->tolm); + + actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); + debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", + actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + + pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0); + pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1); + + /* Get the MIR[0-1] regs */ + limit = (pvt->mir0 >> 4) & 0x0fff; + way0 = pvt->mir0 & 0x1; + way1 = pvt->mir0 & 0x2; + debugf2("MIR0: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + limit = (pvt->mir1 >> 4) & 0xfff; + way0 = pvt->mir1 & 0x1; + way1 = pvt->mir1 & 0x2; + debugf2("MIR1: limit= 0x%x WAY1= %u WAY0= %x\n", limit, way1, way0); + + /* Get the set of MTR[0-3] regs by each branch */ + for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) { + int where = MTR0 + (slot_row * sizeof(u32)); + + /* Branch 0 set of MTR registers */ + pci_read_config_word(pvt->branch_0, where, + &pvt->b0_mtr[slot_row]); + + debugf2("MTR%d where=0x%x B0 value=0x%x\n", slot_row, where, + pvt->b0_mtr[slot_row]); + + if (pvt->maxch < CHANNELS_PER_BRANCH) { + pvt->b1_mtr[slot_row] = 0; + continue; + } + + /* Branch 1 set of MTR registers */ + pci_read_config_word(pvt->branch_1, where, + &pvt->b1_mtr[slot_row]); + debugf2("MTR%d where=0x%x B1 value=0x%x\n", slot_row, where, + pvt->b1_mtr[slot_row]); + } + + /* Read and dump branch 0's MTRs */ + debugf2("\nMemory Technology Registers:\n"); + debugf2(" Branch 0:\n"); + for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) + decode_mtr(slot_row, pvt->b0_mtr[slot_row]); + + pci_read_config_word(pvt->branch_0, AMBPRESENT_0, + &pvt->b0_ambpresent0); + debugf2("\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0); + pci_read_config_word(pvt->branch_0, AMBPRESENT_1, + &pvt->b0_ambpresent1); + debugf2("\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1); + + /* Only if we have 2 branchs (4 channels) */ + if (pvt->maxch < CHANNELS_PER_BRANCH) { + pvt->b1_ambpresent0 = 0; + pvt->b1_ambpresent1 = 0; + } else { + /* Read and dump branch 1's MTRs */ + debugf2(" Branch 1:\n"); + for (slot_row = 0; slot_row < NUM_MTRS_PER_BRANCH; slot_row++) + decode_mtr(slot_row, pvt->b1_mtr[slot_row]); + + pci_read_config_word(pvt->branch_1, AMBPRESENT_0, + &pvt->b1_ambpresent0); + debugf2("\t\tAMB-Branch 1-present0 0x%x:\n", + pvt->b1_ambpresent0); + pci_read_config_word(pvt->branch_1, AMBPRESENT_1, + &pvt->b1_ambpresent1); + debugf2("\t\tAMB-Branch 1-present1 0x%x:\n", + pvt->b1_ambpresent1); + } + + /* Go and determine the size of each DIMM and place in an + * orderly matrix */ + calculate_dimm_size(pvt); +} + +/* + * i5400_init_csrows Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * + * return: + * 0 success + * 1 no actual memory found on this MC + */ +static int i5400_init_csrows(struct mem_ctl_info *mci) +{ + struct i5400_pvt *pvt; + struct csrow_info *p_csrow; + int empty, channel_count; + int max_csrows; + int mtr; + int csrow_megs; + int channel; + int csrow; + + pvt = mci->pvt_info; + + channel_count = pvt->maxch; + max_csrows = pvt->maxdimmperch * 2; + + empty = 1; /* Assume NO memory */ + + for (csrow = 0; csrow < max_csrows; csrow++) { + p_csrow = &mci->csrows[csrow]; + + p_csrow->csrow_idx = csrow; + + /* use branch 0 for the basis */ + mtr = determine_mtr(pvt, csrow, 0); + + /* if no DIMMS on this row, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + /* FAKE OUT VALUES, FIXME */ + p_csrow->first_page = 0 + csrow * 20; + p_csrow->last_page = 9 + csrow * 20; + p_csrow->page_mask = 0xFFF; + + p_csrow->grain = 8; + + csrow_megs = 0; + for (channel = 0; channel < pvt->maxch; channel++) + csrow_megs += pvt->dimm_info[csrow][channel].megabytes; + + p_csrow->nr_pages = csrow_megs << 8; + + /* Assume DDR2 for now */ + p_csrow->mtype = MEM_FB_DDR2; + + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->dtype = DEV_X8; + else + p_csrow->dtype = DEV_X4; + + p_csrow->edac_mode = EDAC_S8ECD8ED; + + empty = 0; + } + + return empty; +} + +/* + * i5400_enable_error_reporting + * Turn on the memory reporting features of the hardware + */ +static void i5400_enable_error_reporting(struct mem_ctl_info *mci) +{ + struct i5400_pvt *pvt; + u32 fbd_error_mask; + + pvt = mci->pvt_info; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(ENABLE_EMASK_ALL); + + pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + fbd_error_mask); +} + +/* + * i5400_get_dimm_and_channel_counts(pdev, &num_csrows, &num_channels) + * + * ask the device how many channels are present and how many CSROWS + * as well + */ +static void i5400_get_dimm_and_channel_counts(struct pci_dev *pdev, + int *num_dimms_per_channel, + int *num_channels) +{ + u8 value; + + /* Need to retrieve just how many channels and dimms per channel are + * supported on this memory controller + */ + pci_read_config_byte(pdev, MAXDIMMPERCH, &value); + *num_dimms_per_channel = (int)value * 2; + + pci_read_config_byte(pdev, MAXCH, &value); + *num_channels = (int)value; +} + +/* + * i5400_probe1 Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int i5400_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + struct i5400_pvt *pvt; + int num_channels; + int num_dimms_per_channel; + int num_csrows; + + if (dev_idx >= ARRAY_SIZE(i5400_devs)) + return -EINVAL; + + debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", + __func__, + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* We only are looking for func 0 of the set */ + if (PCI_FUNC(pdev->devfn) != 0) + return -ENODEV; + + /* Ask the devices for the number of CSROWS and CHANNELS so + * that we can calculate the memory resources, etc + * + * The Chipset will report what it can handle which will be greater + * or equal to what the motherboard manufacturer will implement. + * + * As we don't have a motherboard identification routine to determine + * actual number of slots/dimms per channel, we thus utilize the + * resource as specified by the chipset. Thus, we might have + * have more DIMMs per channel than actually on the mobo, but this + * allows the driver to support upto the chipset max, without + * some fancy mobo determination. + */ + i5400_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel, + &num_channels); + num_csrows = num_dimms_per_channel * 2; + + debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", + __func__, num_channels, num_dimms_per_channel, num_csrows); + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + + mci->dev = &pdev->dev; /* record ptr to the generic device */ + + pvt = mci->pvt_info; + pvt->system_address = pdev; /* Record this device in our private */ + pvt->maxch = num_channels; + pvt->maxdimmperch = num_dimms_per_channel; + + /* 'get' the pci devices we want to reserve for our use */ + if (i5400_get_devices(mci, dev_idx)) + goto fail0; + + /* Time to get serious */ + i5400_get_mc_regs(mci); /* retrieve the hardware registers */ + + mci->mc_idx = 0; + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "i5400_edac.c"; + mci->mod_ver = I5400_REVISION; + mci->ctl_name = i5400_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + + /* Set the function pointer to an actual operation function */ + mci->edac_check = i5400_check_error; + + /* initialize the MC control structure 'csrows' table + * with the mapping and control information */ + if (i5400_init_csrows(mci)) { + debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" + " because i5400_init_csrows() returned nonzero " + "value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + } else { + debugf1("MC: Enable error reporting now\n"); + i5400_enable_error_reporting(mci); + } + + /* add this new MC control structure to EDAC's list of MCs */ + if (edac_mc_add_mc(mci)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + /* FIXME: perhaps some code should go here that disables error + * reporting if we just enabled it + */ + goto fail1; + } + + i5400_clear_error(mci); + + /* allocating generic PCI control info */ + i5400_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i5400_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + return 0; + + /* Error exit unwinding stack */ +fail1: + + i5400_put_devices(mci); + +fail0: + edac_mc_free(mci); + return -ENODEV; +} + +/* + * i5400_init_one constructor for one instance of device + * + * returns: + * negative on error + * count (>= 0) + */ +static int __devinit i5400_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; + + /* now probe and enable the device */ + return i5400_probe1(pdev, id->driver_data); +} + +/* + * i5400_remove_one destructor for one instance of device + * + */ +static void __devexit i5400_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i5400_pci) + edac_pci_release_generic_ctl(i5400_pci); + + mci = edac_mc_del_mc(&pdev->dev); + if (!mci) + return; + + /* retrieve references to resources, and free those resources */ + i5400_put_devices(mci); + + edac_mc_free(mci); +} + +/* + * pci_device_id table for which devices we are looking for + * + * The "E500P" device is the first device supported. + */ +static const struct pci_device_id i5400_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i5400_pci_tbl); + +/* + * i5400_driver pci_driver structure for this module + * + */ +static struct pci_driver i5400_driver = { + .name = "i5400_edac", + .probe = i5400_init_one, + .remove = __devexit_p(i5400_remove_one), + .id_table = i5400_pci_tbl, +}; + +/* + * i5400_init Module entry function + * Try to initialize this module for its devices + */ +static int __init i5400_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&i5400_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +/* + * i5400_exit() Module exit function + * Unregister the driver + */ +static void __exit i5400_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i5400_driver); +} + +module_init(i5400_init); +module_exit(i5400_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>"); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " + I5400_REVISION); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index ebb037b78758..b2d83b95033d 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -311,9 +311,7 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev, } /* cache is irrelevant for PCI bus reads/writes */ - window = ioremap_nocache(pci_resource_start(dev, 0), - pci_resource_len(dev, 0)); - + window = pci_ioremap_bar(dev, 0); if (window == NULL) { i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n", __func__); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 0cfcb2d075a0..853ef37ec006 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -630,27 +630,22 @@ static int mpc85xx_l2_err_remove(struct of_device *op) } static struct of_device_id mpc85xx_l2_err_of_match[] = { - { - .compatible = "fsl,8540-l2-cache-controller", - }, - { - .compatible = "fsl,8541-l2-cache-controller", - }, - { - .compatible = "fsl,8544-l2-cache-controller", - }, - { - .compatible = "fsl,8548-l2-cache-controller", - }, - { - .compatible = "fsl,8555-l2-cache-controller", - }, - { - .compatible = "fsl,8568-l2-cache-controller", - }, - { - .compatible = "fsl,mpc8572-l2-cache-controller", - }, +/* deprecate the fsl,85.. forms in the future, 2.6.30? */ + { .compatible = "fsl,8540-l2-cache-controller", }, + { .compatible = "fsl,8541-l2-cache-controller", }, + { .compatible = "fsl,8544-l2-cache-controller", }, + { .compatible = "fsl,8548-l2-cache-controller", }, + { .compatible = "fsl,8555-l2-cache-controller", }, + { .compatible = "fsl,8568-l2-cache-controller", }, + { .compatible = "fsl,mpc8536-l2-cache-controller", }, + { .compatible = "fsl,mpc8540-l2-cache-controller", }, + { .compatible = "fsl,mpc8541-l2-cache-controller", }, + { .compatible = "fsl,mpc8544-l2-cache-controller", }, + { .compatible = "fsl,mpc8548-l2-cache-controller", }, + { .compatible = "fsl,mpc8555-l2-cache-controller", }, + { .compatible = "fsl,mpc8560-l2-cache-controller", }, + { .compatible = "fsl,mpc8568-l2-cache-controller", }, + { .compatible = "fsl,mpc8572-l2-cache-controller", }, {}, }; @@ -967,27 +962,22 @@ static int mpc85xx_mc_err_remove(struct of_device *op) } static struct of_device_id mpc85xx_mc_err_of_match[] = { - { - .compatible = "fsl,8540-memory-controller", - }, - { - .compatible = "fsl,8541-memory-controller", - }, - { - .compatible = "fsl,8544-memory-controller", - }, - { - .compatible = "fsl,8548-memory-controller", - }, - { - .compatible = "fsl,8555-memory-controller", - }, - { - .compatible = "fsl,8568-memory-controller", - }, - { - .compatible = "fsl,mpc8572-memory-controller", - }, +/* deprecate the fsl,85.. forms in the future, 2.6.30? */ + { .compatible = "fsl,8540-memory-controller", }, + { .compatible = "fsl,8541-memory-controller", }, + { .compatible = "fsl,8544-memory-controller", }, + { .compatible = "fsl,8548-memory-controller", }, + { .compatible = "fsl,8555-memory-controller", }, + { .compatible = "fsl,8568-memory-controller", }, + { .compatible = "fsl,mpc8536-memory-controller", }, + { .compatible = "fsl,mpc8540-memory-controller", }, + { .compatible = "fsl,mpc8541-memory-controller", }, + { .compatible = "fsl,mpc8544-memory-controller", }, + { .compatible = "fsl,mpc8548-memory-controller", }, + { .compatible = "fsl,mpc8555-memory-controller", }, + { .compatible = "fsl,mpc8560-memory-controller", }, + { .compatible = "fsl,mpc8568-memory-controller", }, + { .compatible = "fsl,mpc8572-memory-controller", }, {}, }; diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 78b989d202a3..d76adfea5df7 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -468,8 +468,8 @@ const char *dmi_get_system_info(int field) EXPORT_SYMBOL(dmi_get_system_info); /** - * dmi_name_in_serial - Check if string is in the DMI product serial - * information. + * dmi_name_in_serial - Check if string is in the DMI product serial information + * @str: string to check for */ int dmi_name_in_serial(const char *str) { @@ -585,6 +585,8 @@ EXPORT_SYMBOL_GPL(dmi_walk); /** * dmi_match - compare a string to the dmi field (if exists) + * @f: DMI field identifier + * @str: string to compare the DMI field to * * Returns true if the requested field equals to the str (including NULL). */ diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 48f49d93d249..3d2565441b36 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -95,7 +95,7 @@ config GPIO_MAX732X number for these GPIOs. config GPIO_PCA953X - tristate "PCA953x, PCA955x, and MAX7310 I/O ports" + tristate "PCA953x, PCA955x, TCA64xx, and MAX7310 I/O ports" depends on I2C help Say yes here to provide access to several register-oriented @@ -104,9 +104,10 @@ config GPIO_PCA953X 4 bits: pca9536, pca9537 - 8 bits: max7310, pca9534, pca9538, pca9554, pca9557 + 8 bits: max7310, pca9534, pca9538, pca9554, pca9557, + tca6408 - 16 bits: pca9535, pca9539, pca9555 + 16 bits: pca9535, pca9539, pca9555, tca6416 This driver can also be built as a module. If so, the module will be called pca953x. diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index 9ceeb89f1325..37f35388a2ae 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -33,7 +33,12 @@ static const struct i2c_device_id pca953x_id[] = { { "pca9554", 8, }, { "pca9555", 16, }, { "pca9557", 8, }, + { "max7310", 8, }, + { "pca6107", 8, }, + { "tca6408", 8, }, + { "tca6416", 16, }, + /* NYET: { "tca6424", 24, }, */ { } }; MODULE_DEVICE_TABLE(i2c, pca953x_id); @@ -47,9 +52,6 @@ struct pca953x_chip { struct gpio_chip gpio_chip; }; -/* NOTE: we can't currently rely on fault codes to come from SMBus - * calls, so we map all errors to EIO here and return zero otherwise. - */ static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val) { int ret; @@ -61,7 +63,7 @@ static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val) if (ret < 0) { dev_err(&chip->client->dev, "failed writing register\n"); - return -EIO; + return ret; } return 0; @@ -78,7 +80,7 @@ static int pca953x_read_reg(struct pca953x_chip *chip, int reg, uint16_t *val) if (ret < 0) { dev_err(&chip->client->dev, "failed reading register\n"); - return -EIO; + return ret; } *val = (uint16_t)ret; diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c index 37d3eec8730a..afad14792141 100644 --- a/drivers/gpio/twl4030-gpio.c +++ b/drivers/gpio/twl4030-gpio.c @@ -202,37 +202,6 @@ static int twl4030_get_gpio_datain(int gpio) return ret; } -/* - * Configure debounce timing value for a GPIO pin on TWL4030 - */ -int twl4030_set_gpio_debounce(int gpio, int enable) -{ - u8 d_bnk = gpio >> 3; - u8 d_msk = BIT(gpio & 0x7); - u8 reg = 0; - u8 base = 0; - int ret = 0; - - if (unlikely((gpio >= TWL4030_GPIO_MAX) - || !(gpio_usage_count & BIT(gpio)))) - return -EPERM; - - base = REG_GPIO_DEBEN1 + d_bnk; - mutex_lock(&gpio_lock); - ret = gpio_twl4030_read(base); - if (ret >= 0) { - if (enable) - reg = ret | d_msk; - else - reg = ret & ~d_msk; - - ret = gpio_twl4030_write(base, reg); - } - mutex_unlock(&gpio_lock); - return ret; -} -EXPORT_SYMBOL(twl4030_set_gpio_debounce); - /*----------------------------------------------------------------------*/ static int twl_request(struct gpio_chip *chip, unsigned offset) @@ -405,6 +374,23 @@ static int __devinit gpio_twl4030_pulls(u32 ups, u32 downs) REG_GPIOPUPDCTR1, 5); } +static int __devinit gpio_twl4030_debounce(u32 debounce, u8 mmc_cd) +{ + u8 message[4]; + + /* 30 msec of debouncing is always used for MMC card detect, + * and is optional for everything else. + */ + message[1] = (debounce & 0xff) | (mmc_cd & 0x03); + debounce >>= 8; + message[2] = (debounce & 0xff); + debounce >>= 8; + message[3] = (debounce & 0x03); + + return twl4030_i2c_write(TWL4030_MODULE_GPIO, message, + REG_GPIO_DEBEN1, 3); +} + static int gpio_twl4030_remove(struct platform_device *pdev); static int __devinit gpio_twl4030_probe(struct platform_device *pdev) @@ -439,6 +425,12 @@ no_irqs: pdata->pullups, pdata->pulldowns, ret); + ret = gpio_twl4030_debounce(pdata->debounce, pdata->mmc_cd); + if (ret) + dev_dbg(&pdev->dev, "debounce %.03x %.01x --> %d\n", + pdata->debounce, pdata->mmc_cd, + ret); + twl_gpiochip.base = pdata->gpio_base; twl_gpiochip.ngpio = TWL4030_GPIO_MAX; twl_gpiochip.dev = &pdev->dev; diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 3733e36d135e..b06a53715853 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -183,6 +183,10 @@ int drm_stub_open(struct inode *inode, struct file *filp) old_fops = filp->f_op; filp->f_op = fops_get(&dev->driver->fops); + if (filp->f_op == NULL) { + filp->f_op = old_fops; + goto out; + } if (filp->f_op->open && (err = filp->f_op->open(inode, filp))) { fops_put(filp->f_op); filp->f_op = fops_get(old_fops); diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 66107b4dc12a..1852f27bac51 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -204,8 +204,6 @@ I2C_CLIENT_INSMOD_1(adt7462); #define MASK_AND_SHIFT(value, prefix) \ (((value) & prefix##_MASK) >> prefix##_SHIFT) -#define ROUND_DIV(x, divisor) (((x) + ((divisor) / 2)) / (divisor)) - struct adt7462_data { struct device *hwmon_dev; struct attribute_group attrs; @@ -840,7 +838,7 @@ static ssize_t set_temp_min(struct device *dev, if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index)) return -EINVAL; - temp = ROUND_DIV(temp, 1000) + 64; + temp = DIV_ROUND_CLOSEST(temp, 1000) + 64; temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -878,7 +876,7 @@ static ssize_t set_temp_max(struct device *dev, if (strict_strtol(buf, 10, &temp) || !temp_enabled(data, attr->index)) return -EINVAL; - temp = ROUND_DIV(temp, 1000) + 64; + temp = DIV_ROUND_CLOSEST(temp, 1000) + 64; temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -943,7 +941,7 @@ static ssize_t set_volt_max(struct device *dev, return -EINVAL; temp *= 1000; /* convert mV to uV */ - temp = ROUND_DIV(temp, x); + temp = DIV_ROUND_CLOSEST(temp, x); temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -985,7 +983,7 @@ static ssize_t set_volt_min(struct device *dev, return -EINVAL; temp *= 1000; /* convert mV to uV */ - temp = ROUND_DIV(temp, x); + temp = DIV_ROUND_CLOSEST(temp, x); temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -1250,7 +1248,7 @@ static ssize_t set_pwm_hyst(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = SENSORS_LIMIT(temp, 0, 15); /* package things up */ @@ -1337,7 +1335,7 @@ static ssize_t set_pwm_tmin(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000) + 64; + temp = DIV_ROUND_CLOSEST(temp, 1000) + 64; temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index 1311a595147e..633e1a1e9d79 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -28,6 +28,7 @@ #include <linux/mutex.h> #include <linux/delay.h> #include <linux/log2.h> +#include <linux/kthread.h> /* Addresses to scan */ static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END }; @@ -74,6 +75,7 @@ I2C_CLIENT_INSMOD_1(adt7470); #define ADT7470_REG_PWM12_CFG 0x68 #define ADT7470_PWM2_AUTO_MASK 0x40 #define ADT7470_PWM1_AUTO_MASK 0x80 +#define ADT7470_PWM_AUTO_MASK 0xC0 #define ADT7470_REG_PWM34_CFG 0x69 #define ADT7470_PWM3_AUTO_MASK 0x40 #define ADT7470_PWM4_AUTO_MASK 0x80 @@ -128,8 +130,11 @@ I2C_CLIENT_INSMOD_1(adt7470); /* How often do we reread sensor limit values? (In jiffies) */ #define LIMIT_REFRESH_INTERVAL (60 * HZ) -/* sleep 1s while gathering temperature data */ -#define TEMP_COLLECTION_TIME 1000 +/* Wait at least 200ms per sensor for 10 sensors */ +#define TEMP_COLLECTION_TIME 2000 + +/* auto update thing won't fire more than every 2s */ +#define AUTO_UPDATE_INTERVAL 2000 /* datasheet says to divide this number by the fan reading to get fan rpm */ #define FAN_PERIOD_TO_RPM(x) ((90000 * 60) / (x)) @@ -137,8 +142,6 @@ I2C_CLIENT_INSMOD_1(adt7470); #define FAN_PERIOD_INVALID 65535 #define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID) -#define ROUND_DIV(x, divisor) (((x) + ((divisor) / 2)) / (divisor)) - struct adt7470_data { struct device *hwmon_dev; struct attribute_group attrs; @@ -148,6 +151,9 @@ struct adt7470_data { unsigned long sensors_last_updated; /* In jiffies */ unsigned long limits_last_updated; /* In jiffies */ + int num_temp_sensors; /* -1 = probe */ + int temperatures_probed; + s8 temp[ADT7470_TEMP_COUNT]; s8 temp_min[ADT7470_TEMP_COUNT]; s8 temp_max[ADT7470_TEMP_COUNT]; @@ -163,6 +169,10 @@ struct adt7470_data { u8 pwm_min[ADT7470_PWM_COUNT]; s8 pwm_tmin[ADT7470_PWM_COUNT]; u8 pwm_auto_temp[ADT7470_PWM_COUNT]; + + struct task_struct *auto_update; + struct completion auto_update_stop; + unsigned int auto_update_interval; }; static int adt7470_probe(struct i2c_client *client, @@ -220,40 +230,126 @@ static void adt7470_init_client(struct i2c_client *client) } } -static struct adt7470_data *adt7470_update_device(struct device *dev) +/* Probe for temperature sensors. Assumes lock is held */ +static int adt7470_read_temperatures(struct i2c_client *client, + struct adt7470_data *data) { - struct i2c_client *client = to_i2c_client(dev); - struct adt7470_data *data = i2c_get_clientdata(client); - unsigned long local_jiffies = jiffies; - u8 cfg; + unsigned long res; int i; + u8 cfg, pwm[4], pwm_cfg[2]; - mutex_lock(&data->lock); - if (time_before(local_jiffies, data->sensors_last_updated + - SENSOR_REFRESH_INTERVAL) - && data->sensors_valid) - goto no_sensor_update; + /* save pwm[1-4] config register */ + pwm_cfg[0] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(0)); + pwm_cfg[1] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM_CFG(2)); + + /* set manual pwm to whatever it is set to now */ + for (i = 0; i < ADT7470_FAN_COUNT; i++) + pwm[i] = i2c_smbus_read_byte_data(client, ADT7470_REG_PWM(i)); + + /* put pwm in manual mode */ + i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0), + pwm_cfg[0] & ~(ADT7470_PWM_AUTO_MASK)); + i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2), + pwm_cfg[1] & ~(ADT7470_PWM_AUTO_MASK)); + + /* write pwm control to whatever it was */ + for (i = 0; i < ADT7470_FAN_COUNT; i++) + i2c_smbus_write_byte_data(client, ADT7470_REG_PWM(i), pwm[i]); /* start reading temperature sensors */ cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG); cfg |= 0x80; i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg); - /* - * Delay is 200ms * number of tmp05 sensors. Too bad - * there's no way to figure out how many are connected. - * For now, assume 1s will work. - */ - msleep(TEMP_COLLECTION_TIME); + /* Delay is 200ms * number of temp sensors. */ + res = msleep_interruptible((data->num_temp_sensors >= 0 ? + data->num_temp_sensors * 200 : + TEMP_COLLECTION_TIME)); /* done reading temperature sensors */ cfg = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG); cfg &= ~0x80; i2c_smbus_write_byte_data(client, ADT7470_REG_CFG, cfg); - for (i = 0; i < ADT7470_TEMP_COUNT; i++) + /* restore pwm[1-4] config registers */ + i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(0), pwm_cfg[0]); + i2c_smbus_write_byte_data(client, ADT7470_REG_PWM_CFG(2), pwm_cfg[1]); + + if (res) { + printk(KERN_ERR "ha ha, interrupted"); + return -EAGAIN; + } + + /* Only count fans if we have to */ + if (data->num_temp_sensors >= 0) + return 0; + + for (i = 0; i < ADT7470_TEMP_COUNT; i++) { data->temp[i] = i2c_smbus_read_byte_data(client, ADT7470_TEMP_REG(i)); + if (data->temp[i]) + data->num_temp_sensors = i + 1; + } + data->temperatures_probed = 1; + return 0; +} + +static int adt7470_update_thread(void *p) +{ + struct i2c_client *client = p; + struct adt7470_data *data = i2c_get_clientdata(client); + + while (!kthread_should_stop()) { + mutex_lock(&data->lock); + adt7470_read_temperatures(client, data); + mutex_unlock(&data->lock); + if (kthread_should_stop()) + break; + msleep_interruptible(data->auto_update_interval); + } + + complete_all(&data->auto_update_stop); + return 0; +} + +static struct adt7470_data *adt7470_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7470_data *data = i2c_get_clientdata(client); + unsigned long local_jiffies = jiffies; + u8 cfg; + int i; + int need_sensors = 1; + int need_limits = 1; + + /* + * Figure out if we need to update the shadow registers. + * Lockless means that we may occasionally report out of + * date data. + */ + if (time_before(local_jiffies, data->sensors_last_updated + + SENSOR_REFRESH_INTERVAL) && + data->sensors_valid) + need_sensors = 0; + + if (time_before(local_jiffies, data->limits_last_updated + + LIMIT_REFRESH_INTERVAL) && + data->limits_valid) + need_limits = 0; + + if (!need_sensors && !need_limits) + return data; + + mutex_lock(&data->lock); + if (!need_sensors) + goto no_sensor_update; + + if (!data->temperatures_probed) + adt7470_read_temperatures(client, data); + else + for (i = 0; i < ADT7470_TEMP_COUNT; i++) + data->temp[i] = i2c_smbus_read_byte_data(client, + ADT7470_TEMP_REG(i)); for (i = 0; i < ADT7470_FAN_COUNT; i++) data->fan[i] = adt7470_read_word_data(client, @@ -302,9 +398,7 @@ static struct adt7470_data *adt7470_update_device(struct device *dev) data->sensors_valid = 1; no_sensor_update: - if (time_before(local_jiffies, data->limits_last_updated + - LIMIT_REFRESH_INTERVAL) - && data->limits_valid) + if (!need_limits) goto out; for (i = 0; i < ADT7470_TEMP_COUNT; i++) { @@ -338,6 +432,66 @@ out: return data; } +static ssize_t show_auto_update_interval(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct adt7470_data *data = adt7470_update_device(dev); + return sprintf(buf, "%d\n", data->auto_update_interval); +} + +static ssize_t set_auto_update_interval(struct device *dev, + struct device_attribute *devattr, + const char *buf, + size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7470_data *data = i2c_get_clientdata(client); + long temp; + + if (strict_strtol(buf, 10, &temp)) + return -EINVAL; + + temp = SENSORS_LIMIT(temp, 0, 60000); + + mutex_lock(&data->lock); + data->auto_update_interval = temp; + mutex_unlock(&data->lock); + + return count; +} + +static ssize_t show_num_temp_sensors(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct adt7470_data *data = adt7470_update_device(dev); + return sprintf(buf, "%d\n", data->num_temp_sensors); +} + +static ssize_t set_num_temp_sensors(struct device *dev, + struct device_attribute *devattr, + const char *buf, + size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7470_data *data = i2c_get_clientdata(client); + long temp; + + if (strict_strtol(buf, 10, &temp)) + return -EINVAL; + + temp = SENSORS_LIMIT(temp, -1, 10); + + mutex_lock(&data->lock); + data->num_temp_sensors = temp; + if (temp < 0) + data->temperatures_probed = 0; + mutex_unlock(&data->lock); + + return count; +} + static ssize_t show_temp_min(struct device *dev, struct device_attribute *devattr, char *buf) @@ -360,7 +514,7 @@ static ssize_t set_temp_min(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -394,7 +548,7 @@ static ssize_t set_temp_max(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -671,7 +825,7 @@ static ssize_t set_pwm_tmin(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = SENSORS_LIMIT(temp, 0, 255); mutex_lock(&data->lock); @@ -804,6 +958,10 @@ static ssize_t show_alarm(struct device *dev, } static DEVICE_ATTR(alarm_mask, S_IRUGO, show_alarm_mask, NULL); +static DEVICE_ATTR(num_temp_sensors, S_IWUSR | S_IRUGO, show_num_temp_sensors, + set_num_temp_sensors); +static DEVICE_ATTR(auto_update_interval, S_IWUSR | S_IRUGO, + show_auto_update_interval, set_auto_update_interval); static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max, set_temp_max, 0); @@ -976,6 +1134,8 @@ static SENSOR_DEVICE_ATTR(pwm4_auto_channels_temp, S_IWUSR | S_IRUGO, static struct attribute *adt7470_attr[] = { &dev_attr_alarm_mask.attr, + &dev_attr_num_temp_sensors.attr, + &dev_attr_auto_update_interval.attr, &sensor_dev_attr_temp1_max.dev_attr.attr, &sensor_dev_attr_temp2_max.dev_attr.attr, &sensor_dev_attr_temp3_max.dev_attr.attr, @@ -1108,6 +1268,9 @@ static int adt7470_probe(struct i2c_client *client, goto exit; } + data->num_temp_sensors = -1; + data->auto_update_interval = AUTO_UPDATE_INTERVAL; + i2c_set_clientdata(client, data); mutex_init(&data->lock); @@ -1127,8 +1290,16 @@ static int adt7470_probe(struct i2c_client *client, goto exit_remove; } + init_completion(&data->auto_update_stop); + data->auto_update = kthread_run(adt7470_update_thread, client, + dev_name(data->hwmon_dev)); + if (IS_ERR(data->auto_update)) + goto exit_unregister; + return 0; +exit_unregister: + hwmon_device_unregister(data->hwmon_dev); exit_remove: sysfs_remove_group(&client->dev.kobj, &data->attrs); exit_free: @@ -1141,6 +1312,8 @@ static int adt7470_remove(struct i2c_client *client) { struct adt7470_data *data = i2c_get_clientdata(client); + kthread_stop(data->auto_update); + wait_for_completion(&data->auto_update_stop); hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&client->dev.kobj, &data->attrs); kfree(data); diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c index 18aa30866a6c..0a6ce2367b42 100644 --- a/drivers/hwmon/adt7473.c +++ b/drivers/hwmon/adt7473.c @@ -129,8 +129,6 @@ I2C_CLIENT_INSMOD_1(adt7473); #define FAN_PERIOD_INVALID 65535 #define FAN_DATA_VALID(x) ((x) && (x) != FAN_PERIOD_INVALID) -#define ROUND_DIV(x, divisor) (((x) + ((divisor) / 2)) / (divisor)) - struct adt7473_data { struct device *hwmon_dev; struct attribute_group attrs; @@ -459,7 +457,7 @@ static ssize_t set_temp_min(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = encode_temp(data->temp_twos_complement, temp); mutex_lock(&data->lock); @@ -495,7 +493,7 @@ static ssize_t set_temp_max(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = encode_temp(data->temp_twos_complement, temp); mutex_lock(&data->lock); @@ -720,7 +718,7 @@ static ssize_t set_temp_tmax(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = encode_temp(data->temp_twos_complement, temp); mutex_lock(&data->lock); @@ -756,7 +754,7 @@ static ssize_t set_temp_tmin(struct device *dev, if (strict_strtol(buf, 10, &temp)) return -EINVAL; - temp = ROUND_DIV(temp, 1000); + temp = DIV_ROUND_CLOSEST(temp, 1000); temp = encode_temp(data->temp_twos_complement, temp); mutex_lock(&data->lock); diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 086c2a5cef0b..dca47a591baf 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -131,6 +131,10 @@ static const char* temperature_sensors_sets[][36] = { /* Set 14: iMac 6,1 */ { "TA0P", "TC0D", "TC0H", "TC0P", "TG0D", "TG0H", "TG0P", "TH0P", "TO0P", "Tp0P", NULL }, +/* Set 15: MacBook Air 2,1 */ + { "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TN0D", "TTF0", + "TV0P", "TVFP", "TW0P", "Th0P", "Tp0P", "Tp1P", "TpFP", "Ts0P", + "Ts0S", NULL }, }; /* List of keys used to read/write fan speeds */ @@ -1301,11 +1305,17 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = { { .accelerometer = 0, .light = 0, .temperature_set = 13 }, /* iMac 6: light sensor only, temperature set 14 */ { .accelerometer = 0, .light = 0, .temperature_set = 14 }, +/* MacBook Air 2,1: accelerometer, backlight and temperature set 15 */ + { .accelerometer = 1, .light = 1, .temperature_set = 15 }, }; /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1". * So we need to put "Apple MacBook Pro" before "Apple MacBook". */ static __initdata struct dmi_system_id applesmc_whitelist[] = { + { applesmc_dmi_match, "Apple MacBook Air 2", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir2") }, + &applesmc_dmi_data[15]}, { applesmc_dmi_match, "Apple MacBook Air", { DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") }, diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index 537d9fb2ff88..a36363312f2f 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -40,7 +40,7 @@ static inline u16 extract_value(const char *data, int offset) { - return be16_to_cpup((u16 *)&data[offset]); + return be16_to_cpup((__be16 *)&data[offset]); } #define TEMP_SENSOR 1 diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 4ee85fcf9aaf..3f9503867e6b 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -511,6 +511,13 @@ config BLK_DEV_PIIX This allows the kernel to change PIO, DMA and UDMA speeds and to configure the chip to optimum performance. +config BLK_DEV_IT8172 + tristate "IT8172 IDE support" + select BLK_DEV_IDEDMA_PCI + help + This driver adds support for the IDE controller on the + IT8172 System Controller. + config BLK_DEV_IT8213 tristate "IT8213 IDE support" select BLK_DEV_IDEDMA_PCI diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index 410728992e6a..c2b9c93f0095 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_BLK_DEV_SC1200) += sc1200.o obj-$(CONFIG_BLK_DEV_CY82C693) += cy82c693.o obj-$(CONFIG_BLK_DEV_DELKIN) += delkin_cb.o obj-$(CONFIG_BLK_DEV_HPT366) += hpt366.o +obj-$(CONFIG_BLK_DEV_IT8172) += it8172.o obj-$(CONFIG_BLK_DEV_IT8213) += it8213.o obj-$(CONFIG_BLK_DEV_IT821X) += it821x.o obj-$(CONFIG_BLK_DEV_JMICRON) += jmicron.o diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c index 4142c698e0d3..4485b9c6f0e6 100644 --- a/drivers/ide/aec62xx.c +++ b/drivers/ide/aec62xx.c @@ -83,7 +83,7 @@ static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entr static void aec6210_set_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct chipset_bus_clock_list_entry *bus_clock = host->host_priv; @@ -111,7 +111,7 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed) static void aec6260_set_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct chipset_bus_clock_list_entry *bus_clock = host->host_priv; diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 45d2356bb725..66f43083408b 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -68,7 +68,7 @@ static struct pci_dev *isa_dev; static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); int s_time = t->setup, a_time = t->active, c_time = t->cycle; @@ -150,7 +150,7 @@ static u8 ali_udma_filter(ide_drive_t *drive) static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 speed1 = speed; u8 unit = drive->dn & 1; @@ -198,7 +198,7 @@ static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed) static int ali15x3_dma_setup(ide_drive_t *drive) { if (m5229_revision < 0xC2 && drive->media != ide_disk) { - if (rq_data_dir(drive->hwif->hwgroup->rq)) + if (rq_data_dir(drive->hwif->rq)) return 1; /* try PIO instead of DMA */ } return ide_dma_setup(drive); @@ -490,8 +490,6 @@ static int __devinit init_dma_ali15x3(ide_hwif_t *hwif, if (ide_allocate_dma_engine(hwif)) return -1; - hwif->dma_ops = &sff_dma_ops; - return 0; } @@ -511,6 +509,7 @@ static const struct ide_dma_ops ali_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_port_info ali15x3_chipset __devinitdata = { @@ -519,6 +518,7 @@ static const struct ide_port_info ali15x3_chipset __devinitdata = { .init_hwif = init_hwif_ali15x3, .init_dma = init_dma_ali15x3, .port_ops = &ali_port_ops, + .dma_ops = &sff_dma_ops, .pio_mask = ATA_PIO5, .swdma_mask = ATA_SWDMA2, .mwdma_mask = ATA_MWDMA2, diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c index c6bcd3014a29..69660a431cd9 100644 --- a/drivers/ide/amd74xx.c +++ b/drivers/ide/amd74xx.c @@ -82,7 +82,7 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed) { ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); - ide_drive_t *peer = hwif->drives + (~drive->dn & 1); + ide_drive_t *peer = ide_get_pair_dev(drive); struct ide_timing t, p; int T, UT; u8 udma_mask = hwif->ultra_mask; @@ -92,7 +92,7 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed) ide_timing_compute(drive, speed, &t, T, UT); - if (peer->dev_flags & IDE_DFLAG_PRESENT) { + if (peer) { ide_timing_compute(peer, peer->current_speed, &p, T, UT); ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); } diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index 0ec8fd1e4dcb..79a2dfed8eb7 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -212,8 +212,8 @@ static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed) static int auide_build_dmatable(ide_drive_t *drive) { int i, iswrite, count = 0; - ide_hwif_t *hwif = HWIF(drive); - struct request *rq = HWGROUP(drive)->rq; + ide_hwif_t *hwif = drive->hwif; + struct request *rq = hwif->rq; _auide_hwif *ahwif = &auide_hwif; struct scatterlist *sg; @@ -286,7 +286,7 @@ static int auide_build_dmatable(ide_drive_t *drive) static int auide_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; if (hwif->sg_nents) { ide_destroy_dmatable(drive); @@ -309,8 +309,8 @@ static void auide_dma_exec_cmd(ide_drive_t *drive, u8 command) } static int auide_dma_setup(ide_drive_t *drive) -{ - struct request *rq = HWGROUP(drive)->rq; +{ + struct request *rq = drive->hwif->rq; if (!auide_build_dmatable(drive)) { ide_map_sg(drive, rq); @@ -502,7 +502,6 @@ static const struct ide_tp_ops au1xxx_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c index e4306647d00d..8890276fef7f 100644 --- a/drivers/ide/cmd640.c +++ b/drivers/ide/cmd640.c @@ -467,11 +467,10 @@ static void program_drive_counts(ide_drive_t *drive, unsigned int index) * so we merge the timings, using the slowest value for each timing. */ if (index > 1) { - ide_hwif_t *hwif = drive->hwif; - ide_drive_t *peer = &hwif->drives[!(drive->dn & 1)]; + ide_drive_t *peer = ide_get_pair_dev(drive); unsigned int mate = index ^ 1; - if (peer->dev_flags & IDE_DFLAG_PRESENT) { + if (peer) { if (setup_count < setup_counts[mate]) setup_count = setup_counts[mate]; if (active_count < active_counts[mate]) diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index 3623bf013bcf..2f9688d87ecd 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -115,7 +115,7 @@ static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_ */ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); unsigned int cycle_time; @@ -138,10 +138,12 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio) * the slowest address setup timing ourselves. */ if (hwif->channel) { - ide_drive_t *drives = hwif->drives; + ide_drive_t *pair = ide_get_pair_dev(drive); drive->drive_data = setup_count; - setup_count = max(drives[0].drive_data, drives[1].drive_data); + + if (pair) + setup_count = max_t(u8, setup_count, pair->drive_data); } if (setup_count > 5) /* shouldn't actually happen... */ @@ -180,7 +182,7 @@ static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio) static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 unit = drive->dn & 0x01; u8 regU = 0, pciU = hwif->channel ? UDIDETCR1 : UDIDETCR0; @@ -226,7 +228,7 @@ static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed) static int cmd648_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long base = hwif->dma_base - (hwif->channel * 8); int err = ide_dma_end(drive); u8 irq_mask = hwif->channel ? MRDMODE_INTR_CH1 : @@ -242,7 +244,7 @@ static int cmd648_dma_end(ide_drive_t *drive) static int cmd64x_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int irq_reg = hwif->channel ? ARTTIM23 : CFR; u8 irq_mask = hwif->channel ? ARTTIM23_INTR_CH1 : @@ -259,7 +261,7 @@ static int cmd64x_dma_end(ide_drive_t *drive) static int cmd648_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long base = hwif->dma_base - (hwif->channel * 8); u8 irq_mask = hwif->channel ? MRDMODE_INTR_CH1 : MRDMODE_INTR_CH0; @@ -282,7 +284,7 @@ static int cmd648_dma_test_irq(ide_drive_t *drive) static int cmd64x_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int irq_reg = hwif->channel ? ARTTIM23 : CFR; u8 irq_mask = hwif->channel ? ARTTIM23_INTR_CH1 : @@ -313,7 +315,7 @@ static int cmd64x_dma_test_irq(ide_drive_t *drive) static int cmd646_1_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u8 dma_stat = 0, dma_cmd = 0; drive->waiting_for_dma = 0; @@ -383,6 +385,7 @@ static const struct ide_dma_ops cmd64x_dma_ops = { .dma_test_irq = cmd64x_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_dma_ops cmd646_rev1_dma_ops = { @@ -394,6 +397,7 @@ static const struct ide_dma_ops cmd646_rev1_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_dma_ops cmd648_dma_ops = { @@ -405,6 +409,7 @@ static const struct ide_dma_ops cmd648_dma_ops = { .dma_test_irq = cmd648_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_port_info cmd64x_chipsets[] __devinitdata = { diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c index 5efb467f8fa0..d003bec56ff9 100644 --- a/drivers/ide/cs5520.c +++ b/drivers/ide/cs5520.c @@ -59,7 +59,7 @@ static struct pio_clocks cs5520_pio_clocks[]={ static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *pdev = to_pci_dev(hwif->dev); int controller = drive->dn > 1 ? 1 : 0; diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c index d37baf8ecc5f..74fc5401f407 100644 --- a/drivers/ide/cy82c693.c +++ b/drivers/ide/cy82c693.c @@ -203,7 +203,7 @@ static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode) static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); pio_clocks_t pclk; unsigned int addrCtrl; diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index 39d500d84b07..a5ba820d69bb 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -70,7 +70,6 @@ static const struct ide_tp_ops falconide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index b18e10d99d2e..3eb9b5c63a0f 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -626,7 +626,7 @@ static struct hpt_info *hpt3xx_get_info(struct device *dev) static u8 hpt3xx_udma_filter(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct hpt_info *info = hpt3xx_get_info(hwif->dev); u8 mask = hwif->ultra_mask; @@ -665,7 +665,7 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive) static u8 hpt3xx_mdma_filter(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct hpt_info *info = hpt3xx_get_info(hwif->dev); switch (info->chip_type) { @@ -743,7 +743,7 @@ static void hpt3xx_quirkproc(ide_drive_t *drive) static void hpt3xx_maskproc(ide_drive_t *drive, int mask) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct hpt_info *info = hpt3xx_get_info(hwif->dev); @@ -788,7 +788,7 @@ static void hpt366_dma_lost_irq(ide_drive_t *drive) static void hpt370_clear_engine(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); pci_write_config_byte(dev, hwif->select_data, 0x37); @@ -797,7 +797,7 @@ static void hpt370_clear_engine(ide_drive_t *drive) static void hpt370_irq_timeout(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u16 bfifo = 0; u8 dma_cmd; @@ -822,7 +822,7 @@ static void hpt370_dma_start(ide_drive_t *drive) static int hpt370_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); if (dma_stat & 0x01) { @@ -844,7 +844,7 @@ static void hpt370_dma_timeout(ide_drive_t *drive) /* returns 1 if DMA IRQ issued, 0 otherwise */ static int hpt374_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u16 bfifo = 0; u8 dma_stat; @@ -865,7 +865,7 @@ static int hpt374_dma_test_irq(ide_drive_t *drive) static int hpt374_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 mcr = 0, mcr_addr = hwif->select_data; u8 bwsr = 0, mask = hwif->channel ? 0x02 : 0x01; @@ -927,7 +927,7 @@ static void hpt3xxn_set_clock(ide_hwif_t *hwif, u8 mode) static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq) { - hpt3xxn_set_clock(HWIF(drive), rq_data_dir(rq) ? 0x23 : 0x21); + hpt3xxn_set_clock(drive->hwif, rq_data_dir(rq) ? 0x23 : 0x21); } /** @@ -1349,8 +1349,6 @@ static int __devinit init_dma_hpt366(ide_hwif_t *hwif, if (ide_allocate_dma_engine(hwif)) return -1; - hwif->dma_ops = &sff_dma_ops; - return 0; } @@ -1426,6 +1424,7 @@ static const struct ide_dma_ops hpt37x_dma_ops = { .dma_test_irq = hpt374_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_dma_ops hpt370_dma_ops = { @@ -1437,6 +1436,7 @@ static const struct ide_dma_ops hpt370_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = hpt370_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_dma_ops hpt36x_dma_ops = { @@ -1448,6 +1448,7 @@ static const struct ide_dma_ops hpt36x_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = hpt366_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_port_info hpt366_chipsets[] __devinitdata = { diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 81f70caeb40f..97a35c667aee 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -166,7 +166,7 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = { */ static void icside_maskproc(ide_drive_t *drive, int mask) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct expansion_card *ec = ECARD_DEV(hwif->dev); struct icside_state *state = ecard_get_drvdata(ec); unsigned long flags; @@ -284,7 +284,7 @@ static void icside_dma_host_set(ide_drive_t *drive, int on) static int icside_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct expansion_card *ec = ECARD_DEV(hwif->dev); drive->waiting_for_dma = 0; @@ -299,7 +299,7 @@ static int icside_dma_end(ide_drive_t *drive) static void icside_dma_start(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct expansion_card *ec = ECARD_DEV(hwif->dev); /* We can not enable DMA on both channels simultaneously. */ @@ -309,10 +309,10 @@ static void icside_dma_start(ide_drive_t *drive) static int icside_dma_setup(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct expansion_card *ec = ECARD_DEV(hwif->dev); struct icside_state *state = ecard_get_drvdata(ec); - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; unsigned int dma_mode; if (rq_data_dir(rq)) @@ -362,7 +362,7 @@ static void icside_dma_exec_cmd(ide_drive_t *drive, u8 cmd) static int icside_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct expansion_card *ec = ECARD_DEV(hwif->dev); struct icside_state *state = ecard_get_drvdata(ec); diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c index fd4a36433050..2f9e941968d6 100644 --- a/drivers/ide/ide-acpi.c +++ b/drivers/ide/ide-acpi.c @@ -218,7 +218,7 @@ static acpi_handle ide_acpi_hwif_get_handle(ide_hwif_t *hwif) */ static acpi_handle ide_acpi_drive_get_handle(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; int port; acpi_handle drive_handle; @@ -263,7 +263,7 @@ static int do_drive_get_GTF(ide_drive_t *drive, acpi_status status; struct acpi_buffer output; union acpi_object *out_obj; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct device *dev = hwif->gendev.parent; int err = -ENODEV; int port; @@ -641,7 +641,8 @@ void ide_acpi_push_timing(ide_hwif_t *hwif) */ void ide_acpi_set_state(ide_hwif_t *hwif, int on) { - int unit; + ide_drive_t *drive; + int i; if (ide_noacpi || ide_noacpi_psx) return; @@ -655,9 +656,8 @@ void ide_acpi_set_state(ide_hwif_t *hwif, int on) /* channel first and then drives for power on and verse versa for power off */ if (on) acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0); - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; + ide_port_for_each_dev(i, drive, hwif) { if (!drive->acpidata->obj_handle) drive->acpidata->obj_handle = ide_acpi_drive_get_handle(drive); @@ -711,15 +711,13 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif) * for both drives, regardless whether they are connected * or not. */ - hwif->drives[0].acpidata = &hwif->acpidata->master; - hwif->drives[1].acpidata = &hwif->acpidata->slave; + hwif->devices[0]->acpidata = &hwif->acpidata->master; + hwif->devices[1]->acpidata = &hwif->acpidata->slave; /* * Send IDENTIFY for each drive */ - for (i = 0; i < MAX_DRIVES; i++) { - drive = &hwif->drives[i]; - + ide_port_for_each_dev(i, drive, hwif) { memset(drive->acpidata, 0, sizeof(*drive->acpidata)); if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) @@ -744,9 +742,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif) ide_acpi_get_timing(hwif); ide_acpi_push_timing(hwif); - for (i = 0; i < MAX_DRIVES; i++) { - drive = &hwif->drives[i]; - + ide_port_for_each_dev(i, drive, hwif) { if (drive->dev_flags & IDE_DFLAG_PRESENT) /* Execute ACPI startup code */ ide_acpi_exec_tfs(drive); diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index e8688c0f8645..e96c01260598 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -243,7 +243,7 @@ EXPORT_SYMBOL_GPL(ide_retry_pc); int ide_cd_expiry(ide_drive_t *drive) { - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = drive->hwif->rq; unsigned long wait = 0; debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]); @@ -294,7 +294,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) { struct ide_atapi_pc *pc = drive->pc; ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; xfer_func_t *xferfunc; unsigned int timeout, temp; @@ -491,7 +491,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) { struct ide_atapi_pc *uninitialized_var(pc); ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; ide_expiry_t *expiry; unsigned int timeout; int cmd_len; @@ -549,7 +549,10 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) } /* Set the interrupt routine */ - ide_set_handler(drive, ide_pc_intr, timeout, expiry); + ide_set_handler(drive, + (dev_is_idecd(drive) ? drive->irq_handler + : ide_pc_intr), + timeout, expiry); /* Begin DMA, if necessary */ if (dev_is_idecd(drive)) { @@ -580,7 +583,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive) if (dev_is_idecd(drive)) { tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL; - bcount = ide_cd_get_xferlen(hwif->hwgroup->rq); + bcount = ide_cd_get_xferlen(hwif->rq); expiry = ide_cd_expiry; timeout = ATAPI_WAIT_PC; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 1a7410f88249..cae69372cf45 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -239,7 +239,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, static void cdrom_end_request(ide_drive_t *drive, int uptodate) { - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = drive->hwif->rq; int nsectors = rq->hard_cur_sectors; ide_debug_log(IDE_DBG_FUNC, "Call %s, cmd: 0x%x, uptodate: 0x%x, " @@ -306,8 +306,7 @@ static void ide_dump_status_no_sense(ide_drive_t *drive, const char *msg, u8 st) static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; - struct request *rq = hwgroup->rq; + struct request *rq = hwif->rq; int stat, err, sense_key; /* check for errors */ @@ -502,7 +501,7 @@ end_request: blkdev_dequeue_request(rq); spin_unlock_irqrestore(q->queue_lock, flags); - hwgroup->rq = NULL; + hwif->rq = NULL; cdrom_queue_request_sense(drive, rq->sense, rq); } else @@ -511,106 +510,6 @@ end_request: return 1; } -static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *); -static ide_startstop_t cdrom_newpc_intr(ide_drive_t *); - -/* - * Set up the device registers for transferring a packet command on DEV, - * expecting to later transfer XFERLEN bytes. HANDLER is the routine - * which actually transfers the command to the drive. If this is a - * drq_interrupt device, this routine will arrange for HANDLER to be - * called when the interrupt from the drive arrives. Otherwise, HANDLER - * will be called immediately after the drive is prepared for the transfer. - */ -static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive) -{ - ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; - int xferlen; - - xferlen = ide_cd_get_xferlen(rq); - - ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen); - - /* FIXME: for Virtual DMA we must check harder */ - if (drive->dma) - drive->dma = !hwif->dma_ops->dma_setup(drive); - - /* set up the controller registers */ - ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL, - xferlen, drive->dma); - - if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { - /* waiting for CDB interrupt, not DMA yet. */ - if (drive->dma) - drive->waiting_for_dma = 0; - - /* packet command */ - ide_execute_command(drive, ATA_CMD_PACKET, - cdrom_transfer_packet_command, - ATAPI_WAIT_PC, ide_cd_expiry); - return ide_started; - } else { - ide_execute_pkt_cmd(drive); - - return cdrom_transfer_packet_command(drive); - } -} - -/* - * Send a packet command to DRIVE described by CMD_BUF and CMD_LEN. The device - * registers must have already been prepared by cdrom_start_packet_command. - * HANDLER is the interrupt handler to call when the command completes or - * there's data ready. - */ -#define ATAPI_MIN_CDB_BYTES 12 -static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive) -{ - ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; - int cmd_len; - ide_startstop_t startstop; - - ide_debug_log(IDE_DBG_PC, "Call %s\n", __func__); - - if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { - /* - * Here we should have been called after receiving an interrupt - * from the device. DRQ should how be set. - */ - - /* check for errors */ - if (cdrom_decode_status(drive, ATA_DRQ, NULL)) - return ide_stopped; - - /* ok, next interrupt will be DMA interrupt */ - if (drive->dma) - drive->waiting_for_dma = 1; - } else { - /* otherwise, we must wait for DRQ to get set */ - if (ide_wait_stat(&startstop, drive, ATA_DRQ, - ATA_BUSY, WAIT_READY)) - return startstop; - } - - /* arm the interrupt handler */ - ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry); - - /* ATAPI commands get padded out to 12 bytes minimum */ - cmd_len = COMMAND_SIZE(rq->cmd[0]); - if (cmd_len < ATAPI_MIN_CDB_BYTES) - cmd_len = ATAPI_MIN_CDB_BYTES; - - /* send the command to the device */ - hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len); - - /* start the DMA if need be */ - if (drive->dma) - hwif->dma_ops->dma_start(drive); - - return ide_started; -} - /* * Check the contents of the interrupt reason register from the cdrom * and attempt to recover if there are problems. Returns 0 if everything's @@ -854,8 +753,7 @@ static int cdrom_newpc_intr_dummy_cb(struct request *rq) static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; - struct request *rq = hwgroup->rq; + struct request *rq = hwif->rq; xfer_func_t *xferfunc; ide_expiry_t *expiry = NULL; int dma_error = 0, dma, stat, thislen, uptodate = 0; @@ -1061,7 +959,7 @@ end_request: if (blk_end_request(rq, 0, dlen)) BUG(); - hwgroup->rq = NULL; + hwif->rq = NULL; } else { if (!uptodate) rq->cmd_flags |= REQ_FAILED; @@ -1183,7 +1081,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, return ide_stopped; } - return cdrom_start_packet_command(drive); + return ide_issue_pc(drive); } /* @@ -1916,7 +1814,7 @@ static void ide_cd_release(struct kref *kref) static int ide_cd_probe(ide_drive_t *); -static ide_driver_t ide_cdrom_driver = { +static struct ide_driver ide_cdrom_driver = { .gen_driver = { .owner = THIS_MODULE, .name = "ide-cdrom", @@ -1927,7 +1825,6 @@ static ide_driver_t ide_cdrom_driver = { .version = IDECD_VERSION, .do_request = ide_cd_do_request, .end_request = ide_end_request, - .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc_entries = ide_cd_proc_entries, .proc_devsets = ide_cd_proc_devsets, @@ -2082,6 +1979,7 @@ static int ide_cd_probe(ide_drive_t *drive) } drive->debug_mask = debug_mask; + drive->irq_handler = cdrom_newpc_intr; info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL); if (info == NULL) { diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index bf676b262181..ac40d6cb90a2 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -33,33 +33,33 @@ /* Structure of a MSF cdrom address. */ struct atapi_msf { - byte reserved; - byte minute; - byte second; - byte frame; + u8 reserved; + u8 minute; + u8 second; + u8 frame; }; /* Space to hold the disk TOC. */ #define MAX_TRACKS 99 struct atapi_toc_header { unsigned short toc_length; - byte first_track; - byte last_track; + u8 first_track; + u8 last_track; }; struct atapi_toc_entry { - byte reserved1; + u8 reserved1; #if defined(__BIG_ENDIAN_BITFIELD) - __u8 adr : 4; - __u8 control : 4; + u8 adr : 4; + u8 control : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 control : 4; - __u8 adr : 4; + u8 control : 4; + u8 adr : 4; #else #error "Please fix <asm/byteorder.h>" #endif - byte track; - byte reserved2; + u8 track; + u8 reserved2; union { unsigned lba; struct atapi_msf msf; @@ -77,10 +77,10 @@ struct atapi_toc { /* Extra per-device info for cdrom drives. */ struct cdrom_info { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct kref kref; + ide_drive_t *drive; + struct ide_driver *driver; + struct gendisk *disk; + struct kref kref; /* Buffer for table of contents. NULL if we haven't allocated a TOC buffer for this device yet. */ diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index eb9fac4d0f0c..4088a622873e 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -89,7 +89,7 @@ static void ide_tf_set_cmd(ide_drive_t *drive, ide_task_t *task, u8 dma) static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq, sector_t block) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u16 nsectors = (u16)rq->nr_sectors; u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48); u8 dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); @@ -187,7 +187,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq, static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, sector_t block) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c index f6d2d44d8a9a..123d393658af 100644 --- a/drivers/ide/ide-dma-sff.c +++ b/drivers/ide/ide-dma-sff.c @@ -50,6 +50,27 @@ int config_drive_for_dma(ide_drive_t *drive) return 0; } +u8 ide_dma_sff_read_status(ide_hwif_t *hwif) +{ + unsigned long addr = hwif->dma_base + ATA_DMA_STATUS; + + if (hwif->host_flags & IDE_HFLAG_MMIO) + return readb((void __iomem *)addr); + else + return inb(addr); +} +EXPORT_SYMBOL_GPL(ide_dma_sff_read_status); + +static void ide_dma_sff_write_status(ide_hwif_t *hwif, u8 val) +{ + unsigned long addr = hwif->dma_base + ATA_DMA_STATUS; + + if (hwif->host_flags & IDE_HFLAG_MMIO) + writeb(val, (void __iomem *)addr); + else + outb(val, addr); +} + /** * ide_dma_host_set - Enable/disable DMA on a host * @drive: drive to control @@ -62,18 +83,14 @@ void ide_dma_host_set(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; u8 unit = drive->dn & 1; - u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); if (on) dma_stat |= (1 << (5 + unit)); else dma_stat &= ~(1 << (5 + unit)); - if (hwif->host_flags & IDE_HFLAG_MMIO) - writeb(dma_stat, - (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS)); - else - outb(dma_stat, hwif->dma_base + ATA_DMA_STATUS); + ide_dma_sff_write_status(hwif, dma_stat); } EXPORT_SYMBOL_GPL(ide_dma_host_set); @@ -175,7 +192,7 @@ EXPORT_SYMBOL_GPL(ide_build_dmatable); int ide_dma_setup(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; unsigned int reading = rq_data_dir(rq) ? 0 : ATA_DMA_WR; u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; u8 dma_stat; @@ -187,7 +204,7 @@ int ide_dma_setup(ide_drive_t *drive) } /* PRD table */ - if (hwif->host_flags & IDE_HFLAG_MMIO) + if (mmio) writel(hwif->dmatable_dma, (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS)); else @@ -200,15 +217,10 @@ int ide_dma_setup(ide_drive_t *drive) outb(reading, hwif->dma_base + ATA_DMA_CMD); /* read DMA status for INTR & ERROR flags */ - dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); /* clear INTR & ERROR flags */ - if (mmio) - writeb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR, - (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS)); - else - outb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR, - hwif->dma_base + ATA_DMA_STATUS); + ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR); drive->waiting_for_dma = 1; return 0; @@ -232,7 +244,7 @@ EXPORT_SYMBOL_GPL(ide_dma_setup); static int dma_timer_expiry(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); printk(KERN_WARNING "%s: %s: DMA status (0x%02x)\n", drive->name, __func__, dma_stat); @@ -240,7 +252,7 @@ static int dma_timer_expiry(ide_drive_t *drive) if ((dma_stat & 0x18) == 0x18) /* BUSY Stupid Early Timer !! */ return WAIT_CMD; - hwif->hwgroup->expiry = NULL; /* one free ride for now */ + hwif->expiry = NULL; /* one free ride for now */ if (dma_stat & ATA_DMA_ERR) /* ERROR */ return -1; @@ -289,13 +301,12 @@ EXPORT_SYMBOL_GPL(ide_dma_start); int ide_dma_end(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; u8 dma_stat = 0, dma_cmd = 0, mask; drive->waiting_for_dma = 0; /* stop DMA */ - if (mmio) { + if (hwif->host_flags & IDE_HFLAG_MMIO) { dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD)); writeb(dma_cmd & ~ATA_DMA_START, (void __iomem *)(hwif->dma_base + ATA_DMA_CMD)); @@ -305,15 +316,10 @@ int ide_dma_end(ide_drive_t *drive) } /* get DMA status */ - dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); - if (mmio) - /* clear the INTR & ERROR bits */ - writeb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR, - (void __iomem *)(hwif->dma_base + ATA_DMA_STATUS)); - else - outb(dma_stat | ATA_DMA_ERR | ATA_DMA_INTR, - hwif->dma_base + ATA_DMA_STATUS); + /* clear INTR & ERROR bits */ + ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR); /* purge DMA mappings */ ide_destroy_dmatable(drive); @@ -331,7 +337,7 @@ EXPORT_SYMBOL_GPL(ide_dma_end); int ide_dma_test_irq(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); return (dma_stat & ATA_DMA_INTR) ? 1 : 0; } @@ -346,5 +352,6 @@ const struct ide_dma_ops sff_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_sff_read_status = ide_dma_sff_read_status, }; EXPORT_SYMBOL_GPL(sff_dma_ops); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index fffd11717b2d..72ebab0bc755 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -96,7 +96,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive) if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { if (!dma_stat) { - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; task_end_request(drive, rq, stat); return ide_stopped; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 0a48e2dc53a2..3eab1c6c9b31 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -71,7 +71,7 @@ static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs) { struct ide_disk_obj *floppy = drive->driver_data; - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = drive->hwif->rq; int error; ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__); diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index b8078b3231f7..7857b209c6df 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -149,7 +149,7 @@ static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs) return drive->disk_ops->end_request(drive, uptodate, nrsecs); } -static ide_driver_t ide_gd_driver = { +static struct ide_driver ide_gd_driver = { .gen_driver = { .owner = THIS_MODULE, .name = "ide-gd", @@ -162,7 +162,6 @@ static ide_driver_t ide_gd_driver = { .version = IDE_GD_VERSION, .do_request = ide_gd_do_request, .end_request = ide_gd_end_request, - .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc_entries = ide_disk_proc_entries, .proc_devsets = ide_disk_proc_devsets, diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h index 7d3d101713e0..a86779f0756b 100644 --- a/drivers/ide/ide-gd.h +++ b/drivers/ide/ide-gd.h @@ -14,11 +14,11 @@ #endif struct ide_disk_obj { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct kref kref; - unsigned int openers; /* protected by BKL for now */ + ide_drive_t *drive; + struct ide_driver *driver; + struct gendisk *disk; + struct kref kref; + unsigned int openers; /* protected by BKL for now */ /* Last failed packet command */ struct ide_atapi_pc *failed_pc; diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index e2cdd2e9cdec..9270d3255ee0 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -159,7 +159,6 @@ static const struct ide_tp_ops h8300_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 1c36a8e83d36..cc163319dfbd 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -88,7 +88,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, ret = 0; if (ret == 0 && dequeue) - drive->hwif->hwgroup->rq = NULL; + drive->hwif->rq = NULL; return ret; } @@ -107,7 +107,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) { unsigned int nr_bytes = nr_sectors << 9; - struct request *rq = drive->hwif->hwgroup->rq; + struct request *rq = drive->hwif->rq; if (!nr_bytes) { if (blk_pc_request(rq)) @@ -160,8 +160,8 @@ EXPORT_SYMBOL_GPL(ide_end_dequeued_request); void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) { - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; - struct request *rq = hwgroup->rq; + ide_hwif_t *hwif = drive->hwif; + struct request *rq = hwif->rq; if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *task = (ide_task_t *)rq->special; @@ -186,7 +186,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) return; } - hwgroup->rq = NULL; + hwif->rq = NULL; rq->errors = err; @@ -199,9 +199,9 @@ EXPORT_SYMBOL(ide_end_drive_cmd); static void ide_kill_rq(ide_drive_t *drive, struct request *rq) { if (rq->rq_disk) { - ide_driver_t *drv; + struct ide_driver *drv; - drv = *(ide_driver_t **)rq->rq_disk->private_data; + drv = *(struct ide_driver **)rq->rq_disk->private_data; drv->end_request(drive, 0, 0); } else ide_end_request(drive, 0, 0); @@ -291,7 +291,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u return ide_stopped; } -ide_startstop_t +static ide_startstop_t __ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) { if (drive->media == ide_disk) @@ -299,8 +299,6 @@ __ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) return ide_atapi_error(drive, rq, stat, err); } -EXPORT_SYMBOL_GPL(__ide_error); - /** * ide_error - handle an error on the IDE * @drive: drive the error occurred on @@ -321,7 +319,8 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat) err = ide_dump_status(drive, msg, stat); - if ((rq = HWGROUP(drive)->rq) == NULL) + rq = drive->hwif->rq; + if (rq == NULL) return ide_stopped; /* retry only "normal" I/O: */ @@ -331,15 +330,8 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat) return ide_stopped; } - if (rq->rq_disk) { - ide_driver_t *drv; - - drv = *(ide_driver_t **)rq->rq_disk->private_data; - return drv->error(drive, rq, stat, err); - } else - return __ide_error(drive, rq, stat, err); + return __ide_error(drive, rq, stat, err); } - EXPORT_SYMBOL_GPL(ide_error); static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) @@ -462,7 +454,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd); static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; ide_task_t *task = rq->special; if (task) { @@ -586,7 +578,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", - HWIF(drive)->name, (unsigned long) rq); + drive->hwif->name, (unsigned long) rq); #endif /* bail early if we've exceeded max_failures */ @@ -605,7 +597,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) return startstop; } if (!drive->special.all) { - ide_driver_t *drv; + struct ide_driver *drv; /* * We reset the drive so we need to issue a SETFEATURES. @@ -638,7 +630,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) */ return ide_special_rq(drive, rq); - drv = *(ide_driver_t **)rq->rq_disk->private_data; + drv = *(struct ide_driver **)rq->rq_disk->private_data; return drv->do_request(drive, rq, rq->sector); } @@ -654,7 +646,7 @@ kill_rq: * @timeout: time to stall for (jiffies) * * ide_stall_queue() can be used by a drive to give excess bandwidth back - * to the hwgroup by sleeping for timeout jiffies. + * to the port by sleeping for timeout jiffies. */ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout) @@ -666,45 +658,53 @@ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout) } EXPORT_SYMBOL(ide_stall_queue); +static inline int ide_lock_port(ide_hwif_t *hwif) +{ + if (hwif->busy) + return 1; + + hwif->busy = 1; + + return 0; +} + +static inline void ide_unlock_port(ide_hwif_t *hwif) +{ + hwif->busy = 0; +} + +static inline int ide_lock_host(struct ide_host *host, ide_hwif_t *hwif) +{ + int rc = 0; + + if (host->host_flags & IDE_HFLAG_SERIALIZE) { + rc = test_and_set_bit_lock(IDE_HOST_BUSY, &host->host_busy); + if (rc == 0) { + /* for atari only */ + ide_get_lock(ide_intr, hwif); + } + } + return rc; +} + +static inline void ide_unlock_host(struct ide_host *host) +{ + if (host->host_flags & IDE_HFLAG_SERIALIZE) { + /* for atari only */ + ide_release_lock(); + clear_bit_unlock(IDE_HOST_BUSY, &host->host_busy); + } +} + /* - * Issue a new request to a drive from hwgroup - * - * A hwgroup is a serialized group of IDE interfaces. Usually there is - * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) - * may have both interfaces in a single hwgroup to "serialize" access. - * Or possibly multiple ISA interfaces can share a common IRQ by being grouped - * together into one hwgroup for serialized access. - * - * Note also that several hwgroups can end up sharing a single IRQ, - * possibly along with many other devices. This is especially common in - * PCI-based systems with off-board IDE controller cards. - * - * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag. - * - * The first thread into the driver for a particular hwgroup sets the - * hwgroup->busy flag to indicate that this hwgroup is now active, - * and then initiates processing of the top request from the request queue. - * - * Other threads attempting entry notice the busy setting, and will simply - * queue their new requests and exit immediately. Note that hwgroup->busy - * remains set even when the driver is merely awaiting the next interrupt. - * Thus, the meaning is "this hwgroup is busy processing a request". - * - * When processing of a request completes, the completing thread or IRQ-handler - * will start the next request from the queue. If no more work remains, - * the driver will clear the hwgroup->busy flag and exit. - * - * The per-hwgroup spinlock is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * Issue a new request to a device. */ void do_ide_request(struct request_queue *q) { ide_drive_t *drive = q->queuedata; ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; - struct request *rq; + struct ide_host *host = hwif->host; + struct request *rq = NULL; ide_startstop_t startstop; /* @@ -721,32 +721,40 @@ void do_ide_request(struct request_queue *q) blk_remove_plug(q); spin_unlock_irq(q->queue_lock); - spin_lock_irq(&hwgroup->lock); - if (!ide_lock_hwgroup(hwgroup)) { + if (ide_lock_host(host, hwif)) + goto plug_device_2; + + spin_lock_irq(&hwif->lock); + + if (!ide_lock_port(hwif)) { + ide_hwif_t *prev_port; repeat: - hwgroup->rq = NULL; + prev_port = hwif->host->cur_port; + hwif->rq = NULL; if (drive->dev_flags & IDE_DFLAG_SLEEPING) { if (time_before(drive->sleep, jiffies)) { - ide_unlock_hwgroup(hwgroup); + ide_unlock_port(hwif); goto plug_device; } } - if (hwif != hwgroup->hwif) { + if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) && + hwif != prev_port) { /* - * set nIEN for previous hwif, drives in the + * set nIEN for previous port, drives in the * quirk_list may not like intr setups/cleanups */ - if (drive->quirk_list == 0) - hwif->tp_ops->set_irq(hwif, 0); + if (prev_port && prev_port->cur_dev->quirk_list == 0) + prev_port->tp_ops->set_irq(prev_port, 0); + + hwif->host->cur_port = hwif; } - hwgroup->hwif = hwif; - hwgroup->drive = drive; + hwif->cur_dev = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); spin_lock_irq(q->queue_lock); /* * we know that the queue isn't empty, but this can happen @@ -754,10 +762,10 @@ repeat: */ rq = elv_next_request(drive->queue); spin_unlock_irq(q->queue_lock); - spin_lock_irq(&hwgroup->lock); + spin_lock_irq(&hwif->lock); if (!rq) { - ide_unlock_hwgroup(hwgroup); + ide_unlock_port(hwif); goto out; } @@ -778,27 +786,31 @@ repeat: blk_pm_request(rq) == 0 && (rq->cmd_flags & REQ_PREEMPT) == 0) { /* there should be no pending command at this point */ - ide_unlock_hwgroup(hwgroup); + ide_unlock_port(hwif); goto plug_device; } - hwgroup->rq = rq; + hwif->rq = rq; - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); startstop = start_request(drive, rq); - spin_lock_irq(&hwgroup->lock); + spin_lock_irq(&hwif->lock); if (startstop == ide_stopped) goto repeat; } else goto plug_device; out: - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); + if (rq == NULL) + ide_unlock_host(host); spin_lock_irq(q->queue_lock); return; plug_device: - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); + ide_unlock_host(host); +plug_device_2: spin_lock_irq(q->queue_lock); if (!elv_queue_empty(q)) @@ -806,13 +818,13 @@ plug_device: } /* - * un-busy the hwgroup etc, and clear any pending DMA status. we want to + * un-busy the port etc, and clear any pending DMA status. we want to * retry the current request in pio mode instead of risking tossing it * all away */ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct request *rq; ide_startstop_t ret = ide_stopped; @@ -840,15 +852,14 @@ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) ide_dma_off_quietly(drive); /* - * un-busy drive etc (hwgroup->busy is cleared on return) and - * make sure request is sane + * un-busy drive etc and make sure request is sane */ - rq = HWGROUP(drive)->rq; + rq = hwif->rq; if (!rq) goto out; - HWGROUP(drive)->rq = NULL; + hwif->rq = NULL; rq->errors = 0; @@ -876,7 +887,7 @@ static void ide_plug_device(ide_drive_t *drive) /** * ide_timer_expiry - handle lack of an IDE interrupt - * @data: timer callback magic (hwgroup) + * @data: timer callback magic (hwif) * * An IDE command has timed out before the expected drive return * occurred. At this point we attempt to clean up the current @@ -890,18 +901,18 @@ static void ide_plug_device(ide_drive_t *drive) void ide_timer_expiry (unsigned long data) { - ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data; + ide_hwif_t *hwif = (ide_hwif_t *)data; ide_drive_t *uninitialized_var(drive); ide_handler_t *handler; - ide_expiry_t *expiry; unsigned long flags; unsigned long wait = -1; int plug_device = 0; - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); - if (((handler = hwgroup->handler) == NULL) || - (hwgroup->req_gen != hwgroup->req_gen_timer)) { + handler = hwif->handler; + + if (handler == NULL || hwif->req_gen != hwif->req_gen_timer) { /* * Either a marginal timeout occurred * (got the interrupt just as timer expired), @@ -909,72 +920,68 @@ void ide_timer_expiry (unsigned long data) * Either way, we don't really want to complain about anything. */ } else { - drive = hwgroup->drive; - if (!drive) { - printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n"); - hwgroup->handler = NULL; - } else { - ide_hwif_t *hwif; - ide_startstop_t startstop = ide_stopped; - - if ((expiry = hwgroup->expiry) != NULL) { - /* continue */ - if ((wait = expiry(drive)) > 0) { - /* reset timer */ - hwgroup->timer.expires = jiffies + wait; - hwgroup->req_gen_timer = hwgroup->req_gen; - add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&hwgroup->lock, flags); - return; - } - } - hwgroup->handler = NULL; - /* - * We need to simulate a real interrupt when invoking - * the handler() function, which means we need to - * globally mask the specific IRQ: - */ - spin_unlock(&hwgroup->lock); - hwif = HWIF(drive); - /* disable_irq_nosync ?? */ - disable_irq(hwif->irq); - /* local CPU only, - * as if we were handling an interrupt */ - local_irq_disable(); - if (hwgroup->polling) { - startstop = handler(drive); - } else if (drive_is_ready(drive)) { - if (drive->waiting_for_dma) - hwif->dma_ops->dma_lost_irq(drive); - (void)ide_ack_intr(hwif); - printk(KERN_WARNING "%s: lost interrupt\n", drive->name); - startstop = handler(drive); - } else { - if (drive->waiting_for_dma) { - startstop = ide_dma_timeout_retry(drive, wait); - } else - startstop = - ide_error(drive, "irq timeout", - hwif->tp_ops->read_status(hwif)); - } - spin_lock_irq(&hwgroup->lock); - enable_irq(hwif->irq); - if (startstop == ide_stopped) { - ide_unlock_hwgroup(hwgroup); - plug_device = 1; + ide_expiry_t *expiry = hwif->expiry; + ide_startstop_t startstop = ide_stopped; + + drive = hwif->cur_dev; + + if (expiry) { + wait = expiry(drive); + if (wait > 0) { /* continue */ + /* reset timer */ + hwif->timer.expires = jiffies + wait; + hwif->req_gen_timer = hwif->req_gen; + add_timer(&hwif->timer); + spin_unlock_irqrestore(&hwif->lock, flags); + return; } } + hwif->handler = NULL; + /* + * We need to simulate a real interrupt when invoking + * the handler() function, which means we need to + * globally mask the specific IRQ: + */ + spin_unlock(&hwif->lock); + /* disable_irq_nosync ?? */ + disable_irq(hwif->irq); + /* local CPU only, as if we were handling an interrupt */ + local_irq_disable(); + if (hwif->polling) { + startstop = handler(drive); + } else if (drive_is_ready(drive)) { + if (drive->waiting_for_dma) + hwif->dma_ops->dma_lost_irq(drive); + (void)ide_ack_intr(hwif); + printk(KERN_WARNING "%s: lost interrupt\n", + drive->name); + startstop = handler(drive); + } else { + if (drive->waiting_for_dma) + startstop = ide_dma_timeout_retry(drive, wait); + else + startstop = ide_error(drive, "irq timeout", + hwif->tp_ops->read_status(hwif)); + } + spin_lock_irq(&hwif->lock); + enable_irq(hwif->irq); + if (startstop == ide_stopped) { + ide_unlock_port(hwif); + plug_device = 1; + } } - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); - if (plug_device) + if (plug_device) { + ide_unlock_host(hwif->host); ide_plug_device(drive); + } } /** * unexpected_intr - handle an unexpected IDE interrupt * @irq: interrupt line - * @hwgroup: hwgroup being processed + * @hwif: port being processed * * There's nothing really useful we can do with an unexpected interrupt, * other than reading the status register (to clear it), and logging it. @@ -998,52 +1005,38 @@ void ide_timer_expiry (unsigned long data) * before completing the issuance of any new drive command, so we will not * be accidentally invoked as a result of any valid command completion * interrupt. - * - * Note that we must walk the entire hwgroup here. We know which hwif - * is doing the current command, but we don't know which hwif burped - * mysteriously. */ - -static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup) -{ - u8 stat; - ide_hwif_t *hwif = hwgroup->hwif; - /* - * handle the unexpected interrupt - */ - do { - if (hwif->irq == irq) { - stat = hwif->tp_ops->read_status(hwif); - - if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { - /* Try to not flood the console with msgs */ - static unsigned long last_msgtime, count; - ++count; - if (time_after(jiffies, last_msgtime + HZ)) { - last_msgtime = jiffies; - printk(KERN_ERR "%s%s: unexpected interrupt, " - "status=0x%02x, count=%ld\n", - hwif->name, - (hwif->next==hwgroup->hwif) ? "" : "(?)", stat, count); - } - } +static void unexpected_intr(int irq, ide_hwif_t *hwif) +{ + u8 stat = hwif->tp_ops->read_status(hwif); + + if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { + /* Try to not flood the console with msgs */ + static unsigned long last_msgtime, count; + ++count; + + if (time_after(jiffies, last_msgtime + HZ)) { + last_msgtime = jiffies; + printk(KERN_ERR "%s: unexpected interrupt, " + "status=0x%02x, count=%ld\n", + hwif->name, stat, count); } - } while ((hwif = hwif->next) != hwgroup->hwif); + } } /** * ide_intr - default IDE interrupt handler * @irq: interrupt number - * @dev_id: hwif group + * @dev_id: hwif * @regs: unused weirdness from the kernel irq layer * * This is the default IRQ handler for the IDE layer. You should * not need to override it. If you do be aware it is subtle in * places * - * hwgroup->hwif is the interface in the group currently performing - * a command. hwgroup->drive is the drive and hwgroup->handler is + * hwif is the interface in the group currently performing + * a command. hwif->cur_dev is the drive and hwif->handler is * the IRQ handler to call. As we issue a command the handlers * step through multiple states, reassigning the handler to the * next step in the process. Unlike a smart SCSI controller IDE @@ -1054,26 +1047,32 @@ static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup) * * The handler eventually returns ide_stopped to indicate the * request completed. At this point we issue the next request - * on the hwgroup and the process begins again. + * on the port and the process begins again. */ - + irqreturn_t ide_intr (int irq, void *dev_id) { - unsigned long flags; - ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id; - ide_hwif_t *hwif = hwgroup->hwif; + ide_hwif_t *hwif = (ide_hwif_t *)dev_id; ide_drive_t *uninitialized_var(drive); ide_handler_t *handler; + unsigned long flags; ide_startstop_t startstop; irqreturn_t irq_ret = IRQ_NONE; int plug_device = 0; - spin_lock_irqsave(&hwgroup->lock, flags); + if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) { + if (hwif != hwif->host->cur_port) + goto out_early; + } + + spin_lock_irqsave(&hwif->lock, flags); if (!ide_ack_intr(hwif)) goto out; - if ((handler = hwgroup->handler) == NULL || hwgroup->polling) { + handler = hwif->handler; + + if (handler == NULL || hwif->polling) { /* * Not expecting an interrupt from this drive. * That means this could be: @@ -1097,7 +1096,7 @@ irqreturn_t ide_intr (int irq, void *dev_id) * Probably not a shared PCI interrupt, * so we can safely try to do something about it: */ - unexpected_intr(irq, hwgroup); + unexpected_intr(irq, hwif); #ifdef CONFIG_BLK_DEV_IDEPCI } else { /* @@ -1110,16 +1109,7 @@ irqreturn_t ide_intr (int irq, void *dev_id) goto out; } - drive = hwgroup->drive; - if (!drive) { - /* - * This should NEVER happen, and there isn't much - * we could do about it here. - * - * [Note - this can occur if the drive is hot unplugged] - */ - goto out_handled; - } + drive = hwif->cur_dev; if (!drive_is_ready(drive)) /* @@ -1131,10 +1121,10 @@ irqreturn_t ide_intr (int irq, void *dev_id) */ goto out; - hwgroup->handler = NULL; - hwgroup->req_gen++; - del_timer(&hwgroup->timer); - spin_unlock(&hwgroup->lock); + hwif->handler = NULL; + hwif->req_gen++; + del_timer(&hwif->timer); + spin_unlock(&hwif->lock); if (hwif->port_ops && hwif->port_ops->clear_irq) hwif->port_ops->clear_irq(drive); @@ -1145,7 +1135,7 @@ irqreturn_t ide_intr (int irq, void *dev_id) /* service this interrupt, may set handler for next interrupt */ startstop = handler(drive); - spin_lock_irq(&hwgroup->lock); + spin_lock_irq(&hwif->lock); /* * Note that handler() may have set things up for another * interrupt to occur soon, but it cannot happen until @@ -1154,20 +1144,18 @@ irqreturn_t ide_intr (int irq, void *dev_id) * won't allow another of the same (on any CPU) until we return. */ if (startstop == ide_stopped) { - if (hwgroup->handler == NULL) { /* paranoia */ - ide_unlock_hwgroup(hwgroup); - plug_device = 1; - } else - printk(KERN_ERR "%s: %s: huh? expected NULL handler " - "on exit\n", __func__, drive->name); + BUG_ON(hwif->handler); + ide_unlock_port(hwif); + plug_device = 1; } -out_handled: irq_ret = IRQ_HANDLED; out: - spin_unlock_irqrestore(&hwgroup->lock, flags); - - if (plug_device) + spin_unlock_irqrestore(&hwif->lock, flags); +out_early: + if (plug_device) { + ide_unlock_host(hwif->host); ide_plug_device(drive); + } return irq_ret; } @@ -1189,15 +1177,13 @@ out: void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq) { - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; struct request_queue *q = drive->queue; unsigned long flags; - hwgroup->rq = NULL; + drive->hwif->rq = NULL; spin_lock_irqsave(q->queue_lock, flags); __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); - blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(ide_do_drive_cmd); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index ad8bd6539283..e728cfe7273f 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -105,15 +105,6 @@ u8 ide_read_altstatus(ide_hwif_t *hwif) } EXPORT_SYMBOL_GPL(ide_read_altstatus); -u8 ide_read_sff_dma_status(ide_hwif_t *hwif) -{ - if (hwif->host_flags & IDE_HFLAG_MMIO) - return readb((void __iomem *)(hwif->dma_base + ATA_DMA_STATUS)); - else - return inb(hwif->dma_base + ATA_DMA_STATUS); -} -EXPORT_SYMBOL_GPL(ide_read_sff_dma_status); - void ide_set_irq(ide_hwif_t *hwif, int on) { u8 ctl = ATA_DEVCTL_OBS; @@ -388,7 +379,6 @@ const struct ide_tp_ops default_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, @@ -451,7 +441,7 @@ EXPORT_SYMBOL(ide_fixstring); */ int drive_is_ready (ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u8 stat = 0; if (drive->waiting_for_dma) @@ -503,7 +493,8 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti stat = tp_ops->read_status(hwif); if (stat & ATA_BUSY) { - local_irq_set(flags); + local_irq_save(flags); + local_irq_enable_in_hardirq(); timeout += jiffies; while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) { if (time_after(jiffies, timeout)) { @@ -822,25 +813,25 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry) { - ide_hwgroup_t *hwgroup = HWGROUP(drive); - - BUG_ON(hwgroup->handler); - hwgroup->handler = handler; - hwgroup->expiry = expiry; - hwgroup->timer.expires = jiffies + timeout; - hwgroup->req_gen_timer = hwgroup->req_gen; - add_timer(&hwgroup->timer); + ide_hwif_t *hwif = drive->hwif; + + BUG_ON(hwif->handler); + hwif->handler = handler; + hwif->expiry = expiry; + hwif->timer.expires = jiffies + timeout; + hwif->req_gen_timer = hwif->req_gen; + add_timer(&hwif->timer); } void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry) { - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; + ide_hwif_t *hwif = drive->hwif; unsigned long flags; - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); __ide_set_handler(drive, handler, timeout, expiry); - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); } EXPORT_SYMBOL(ide_set_handler); @@ -863,10 +854,9 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler, unsigned timeout, ide_expiry_t *expiry) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; unsigned long flags; - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); __ide_set_handler(drive, handler, timeout, expiry); hwif->tp_ops->exec_command(hwif, cmd); /* @@ -876,26 +866,25 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler, * FIXME: we could skip this delay with care on non shared devices */ ndelay(400); - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); } EXPORT_SYMBOL(ide_execute_command); void ide_execute_pkt_cmd(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; unsigned long flags; - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET); ndelay(400); - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); } EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd); static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) { - struct request *rq = drive->hwif->hwgroup->rq; + struct request *rq = drive->hwif->rq; if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) ide_end_request(drive, err ? err : 1, 0); @@ -913,7 +902,6 @@ static ide_startstop_t do_reset1 (ide_drive_t *, int); static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; u8 stat; SELECT_DRIVE(drive); @@ -923,20 +911,20 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) if (OK_STAT(stat, 0, ATA_BUSY)) printk("%s: ATAPI reset complete\n", drive->name); else { - if (time_before(jiffies, hwgroup->poll_timeout)) { + if (time_before(jiffies, hwif->poll_timeout)) { ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL); /* continue polling */ return ide_started; } /* end of polling */ - hwgroup->polling = 0; + hwif->polling = 0; printk("%s: ATAPI reset timed-out, status=0x%02x\n", drive->name, stat); /* do it the old fashioned way */ return do_reset1(drive, 1); } /* done polling */ - hwgroup->polling = 0; + hwif->polling = 0; ide_complete_drive_reset(drive, 0); return ide_stopped; } @@ -968,8 +956,7 @@ static void ide_reset_report_error(ide_hwif_t *hwif, u8 err) */ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) { - ide_hwgroup_t *hwgroup = HWGROUP(drive); - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; const struct ide_port_ops *port_ops = hwif->port_ops; u8 tmp; int err = 0; @@ -986,7 +973,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) tmp = hwif->tp_ops->read_status(hwif); if (!OK_STAT(tmp, 0, ATA_BUSY)) { - if (time_before(jiffies, hwgroup->poll_timeout)) { + if (time_before(jiffies, hwif->poll_timeout)) { ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); /* continue polling */ return ide_started; @@ -1007,7 +994,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) } } out: - hwgroup->polling = 0; /* done polling */ + hwif->polling = 0; /* done polling */ ide_complete_drive_reset(drive, err); return ide_stopped; } @@ -1081,18 +1068,18 @@ static void pre_reset(ide_drive_t *drive) static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; struct ide_io_ports *io_ports = &hwif->io_ports; const struct ide_tp_ops *tp_ops = hwif->tp_ops; const struct ide_port_ops *port_ops; + ide_drive_t *tdrive; unsigned long flags, timeout; - unsigned int unit; + int i; DEFINE_WAIT(wait); - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); /* We must not reset with running handlers */ - BUG_ON(hwgroup->handler != NULL); + BUG_ON(hwif->handler != NULL); /* For an ATAPI device, first try an ATAPI SRST. */ if (drive->media != ide_disk && !do_not_try_atapi) { @@ -1101,10 +1088,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) udelay (20); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); ndelay(400); - hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; - hwgroup->polling = 1; + hwif->poll_timeout = jiffies + WAIT_WORSTCASE; + hwif->polling = 1; __ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL); - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); return ide_started; } @@ -1114,9 +1101,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE); timeout = jiffies; - for (unit = 0; unit < MAX_DRIVES; unit++) { - ide_drive_t *tdrive = &hwif->drives[unit]; - + ide_port_for_each_dev(i, tdrive, hwif) { if (tdrive->dev_flags & IDE_DFLAG_PRESENT && tdrive->dev_flags & IDE_DFLAG_PARKED && time_after(tdrive->sleep, timeout)) @@ -1127,9 +1112,9 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) if (time_before_eq(timeout, now)) break; - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); timeout = schedule_timeout_uninterruptible(timeout - now); - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); } while (timeout); finish_wait(&ide_park_wq, &wait); @@ -1137,11 +1122,11 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) * First, reset any device state data we were maintaining * for any of the drives on this interface. */ - for (unit = 0; unit < MAX_DRIVES; ++unit) - pre_reset(&hwif->drives[unit]); + ide_port_for_each_dev(i, tdrive, hwif) + pre_reset(tdrive); if (io_ports->ctl_addr == 0) { - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); ide_complete_drive_reset(drive, -ENXIO); return ide_stopped; } @@ -1164,8 +1149,8 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) tp_ops->set_irq(hwif, drive->quirk_list == 2); /* more than enough time */ udelay(10); - hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; - hwgroup->polling = 1; + hwif->poll_timeout = jiffies + WAIT_WORSTCASE; + hwif->polling = 1; __ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); /* @@ -1177,7 +1162,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) if (port_ops && port_ops->resetproc) port_ops->resetproc(drive); - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); return ide_started; } @@ -1221,6 +1206,3 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout) } return -EBUSY; } - -EXPORT_SYMBOL_GPL(ide_wait_not_busy); - diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 9f6e33d8a8b2..09526a0de734 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -273,7 +273,7 @@ int ide_set_xfer_rate(ide_drive_t *drive, u8 rate) static void ide_dump_opcode(ide_drive_t *drive) { - struct request *rq = drive->hwif->hwgroup->rq; + struct request *rq = drive->hwif->rq; ide_task_t *task = NULL; if (!rq) @@ -346,10 +346,13 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err) printk(KERN_CONT "}"); if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK || (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) { + struct request *rq = drive->hwif->rq; + ide_dump_sector(drive); - if (HWGROUP(drive) && HWGROUP(drive)->rq) + + if (rq) printk(KERN_CONT ", sector=%llu", - (unsigned long long)HWGROUP(drive)->rq->sector); + (unsigned long long)rq->sector); } printk(KERN_CONT "\n"); } diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 678454ac2483..c875a957596c 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -7,22 +7,22 @@ DECLARE_WAIT_QUEUE_HEAD(ide_park_wq); static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) { - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; + ide_hwif_t *hwif = drive->hwif; struct request_queue *q = drive->queue; struct request *rq; int rc; timeout += jiffies; - spin_lock_irq(&hwgroup->lock); + spin_lock_irq(&hwif->lock); if (drive->dev_flags & IDE_DFLAG_PARKED) { int reset_timer = time_before(timeout, drive->sleep); int start_queue = 0; drive->sleep = timeout; wake_up_all(&ide_park_wq); - if (reset_timer && del_timer(&hwgroup->timer)) + if (reset_timer && del_timer(&hwif->timer)) start_queue = 1; - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); if (start_queue) { spin_lock_irq(q->queue_lock); @@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) } return; } - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); rq = blk_get_request(q, READ, __GFP_WAIT); rq->cmd[0] = REQ_PARK_HEADS; @@ -64,21 +64,21 @@ ssize_t ide_park_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; + ide_hwif_t *hwif = drive->hwif; unsigned long now; unsigned int msecs; if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD) return -EOPNOTSUPP; - spin_lock_irq(&hwgroup->lock); + spin_lock_irq(&hwif->lock); now = jiffies; if (drive->dev_flags & IDE_DFLAG_PARKED && time_after(drive->sleep, now)) msecs = jiffies_to_msecs(drive->sleep - now); else msecs = 0; - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); return snprintf(buf, 20, "%u\n", msecs); } diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 8282c6086e6a..4b3bf6a06b70 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -5,7 +5,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) { ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive); - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; ide_task_t args; @@ -39,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) int generic_ide_resume(struct device *dev) { ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive); - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; ide_task_t args; @@ -67,7 +67,7 @@ int generic_ide_resume(struct device *dev) blk_put_request(rq); if (err == 0 && dev->driver) { - ide_driver_t *drv = to_ide_driver(dev->driver); + struct ide_driver *drv = to_ide_driver(dev->driver); if (drv->resume) drv->resume(drive); @@ -194,7 +194,7 @@ void ide_complete_pm_request(ide_drive_t *drive, struct request *rq) } spin_unlock_irqrestore(q->queue_lock, flags); - drive->hwif->hwgroup->rq = NULL; + drive->hwif->rq = NULL; if (blk_end_request(rq, 0, 0)) BUG(); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index c5adb7b9c5b5..0ccbb4459fb9 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -189,7 +189,7 @@ static void ide_classify_atapi_dev(ide_drive_t *drive) static void do_identify(ide_drive_t *drive, u8 cmd) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u16 *id = drive->id; char *m = (char *)&id[ATA_ID_PROD]; unsigned long flags; @@ -266,7 +266,7 @@ err_misc: static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; const struct ide_tp_ops *tp_ops = hwif->tp_ops; int use_altstatus = 0, rc; @@ -341,7 +341,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) static int try_to_identify (ide_drive_t *drive, u8 cmd) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; int retval; int autoprobe = 0; @@ -438,7 +438,7 @@ static u8 ide_read_device(ide_drive_t *drive) static int do_probe (ide_drive_t *drive, u8 cmd) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; int rc; u8 present = !!(drive->dev_flags & IDE_DFLAG_PRESENT), stat; @@ -463,7 +463,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (ide_read_device(drive) != drive->select && present == 0) { if (drive->dn & 1) { /* exit with drive0 selected */ - SELECT_DRIVE(&hwif->drives[0]); + SELECT_DRIVE(hwif->devices[0]); /* allow ATA_BUSY to assert & clear */ msleep(50); } @@ -509,7 +509,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) } if (drive->dn & 1) { /* exit with drive0 selected */ - SELECT_DRIVE(&hwif->drives[0]); + SELECT_DRIVE(hwif->devices[0]); msleep(50); /* ensure drive irq is clear */ (void)tp_ops->read_status(hwif); @@ -522,7 +522,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) */ static void enable_nest (ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; u8 stat; @@ -697,7 +697,8 @@ out: static int ide_port_wait_ready(ide_hwif_t *hwif) { - int unit, rc; + ide_drive_t *drive; + int i, rc; printk(KERN_DEBUG "Probing IDE interface %s...\n", hwif->name); @@ -714,9 +715,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) return rc; /* Now make sure both master & slave are ready */ - for (unit = 0; unit < MAX_DRIVES; unit++) { - ide_drive_t *drive = &hwif->drives[unit]; - + ide_port_for_each_dev(i, drive, hwif) { /* Ignore disks that we will not probe for later. */ if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 || (drive->dev_flags & IDE_DFLAG_PRESENT)) { @@ -732,8 +731,8 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) } out: /* Exit function with master reselected (let's be sane) */ - if (unit) - SELECT_DRIVE(&hwif->drives[0]); + if (i) + SELECT_DRIVE(hwif->devices[0]); return rc; } @@ -749,7 +748,7 @@ out: void ide_undecoded_slave(ide_drive_t *dev1) { - ide_drive_t *dev0 = &dev1->hwif->drives[0]; + ide_drive_t *dev0 = dev1->hwif->devices[0]; if ((dev1->dn & 1) == 0 || (dev0->dev_flags & IDE_DFLAG_PRESENT) == 0) return; @@ -778,14 +777,15 @@ EXPORT_SYMBOL_GPL(ide_undecoded_slave); static int ide_probe_port(ide_hwif_t *hwif) { + ide_drive_t *drive; unsigned long flags; unsigned int irqd; - int unit, rc = -ENODEV; + int i, rc = -ENODEV; BUG_ON(hwif->present); - if ((hwif->drives[0].dev_flags & IDE_DFLAG_NOPROBE) && - (hwif->drives[1].dev_flags & IDE_DFLAG_NOPROBE)) + if ((hwif->devices[0]->dev_flags & IDE_DFLAG_NOPROBE) && + (hwif->devices[1]->dev_flags & IDE_DFLAG_NOPROBE)) return -EACCES; /* @@ -796,7 +796,8 @@ static int ide_probe_port(ide_hwif_t *hwif) if (irqd) disable_irq(hwif->irq); - local_irq_set(flags); + local_irq_save(flags); + local_irq_enable_in_hardirq(); if (ide_port_wait_ready(hwif) == -EBUSY) printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name); @@ -805,9 +806,7 @@ static int ide_probe_port(ide_hwif_t *hwif) * Second drive should only exist if first drive was found, * but a lot of cdrom drives are configured as single slaves. */ - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - + ide_port_for_each_dev(i, drive, hwif) { (void) probe_for_drive(drive); if (drive->dev_flags & IDE_DFLAG_PRESENT) rc = 0; @@ -828,20 +827,17 @@ static int ide_probe_port(ide_hwif_t *hwif) static void ide_port_tune_devices(ide_hwif_t *hwif) { const struct ide_port_ops *port_ops = hwif->port_ops; - int unit; - - for (unit = 0; unit < MAX_DRIVES; unit++) { - ide_drive_t *drive = &hwif->drives[unit]; + ide_drive_t *drive; + int i; + ide_port_for_each_dev(i, drive, hwif) { if (drive->dev_flags & IDE_DFLAG_PRESENT) { if (port_ops && port_ops->quirkproc) port_ops->quirkproc(drive); } } - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - + ide_port_for_each_dev(i, drive, hwif) { if (drive->dev_flags & IDE_DFLAG_PRESENT) { ide_set_max_pio(drive); @@ -852,11 +848,8 @@ static void ide_port_tune_devices(ide_hwif_t *hwif) } } - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - - if ((hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) || - drive->id[ATA_ID_DWORD_IO]) + ide_port_for_each_dev(i, drive, hwif) { + if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT; else drive->dev_flags &= ~IDE_DFLAG_NO_IO_32BIT; @@ -869,7 +862,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif) static int ide_init_queue(ide_drive_t *drive) { struct request_queue *q; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; int max_sectors = 256; int max_sg_entries = PRD_ENTRIES; @@ -918,36 +911,19 @@ static int ide_init_queue(ide_drive_t *drive) return 0; } -static void ide_add_drive_to_hwgroup(ide_drive_t *drive) -{ - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; - - spin_lock_irq(&hwgroup->lock); - if (!hwgroup->drive) { - /* first drive for hwgroup. */ - drive->next = drive; - hwgroup->drive = drive; - hwgroup->hwif = HWIF(hwgroup->drive); - } else { - drive->next = hwgroup->drive->next; - hwgroup->drive->next = drive; - } - spin_unlock_irq(&hwgroup->lock); -} +static DEFINE_MUTEX(ide_cfg_mtx); /* * For any present drive: * - allocate the block device queue - * - link drive into the hwgroup */ static int ide_port_setup_devices(ide_hwif_t *hwif) { + ide_drive_t *drive; int i, j = 0; mutex_lock(&ide_cfg_mtx); - for (i = 0; i < MAX_DRIVES; i++) { - ide_drive_t *drive = &hwif->drives[i]; - + ide_port_for_each_dev(i, drive, hwif) { if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) continue; @@ -961,139 +937,39 @@ static int ide_port_setup_devices(ide_hwif_t *hwif) } j++; - - ide_add_drive_to_hwgroup(drive); } mutex_unlock(&ide_cfg_mtx); return j; } -static ide_hwif_t *ide_ports[MAX_HWIFS]; - -void ide_remove_port_from_hwgroup(ide_hwif_t *hwif) -{ - ide_hwgroup_t *hwgroup = hwif->hwgroup; - - ide_ports[hwif->index] = NULL; - - spin_lock_irq(&hwgroup->lock); - /* - * Remove us from the hwgroup, and free - * the hwgroup if we were the only member - */ - if (hwif->next == hwif) { - BUG_ON(hwgroup->hwif != hwif); - kfree(hwgroup); - } else { - /* There is another interface in hwgroup. - * Unlink us, and set hwgroup->drive and ->hwif to - * something sane. - */ - ide_hwif_t *g = hwgroup->hwif; - - while (g->next != hwif) - g = g->next; - g->next = hwif->next; - if (hwgroup->hwif == hwif) { - /* Chose a random hwif for hwgroup->hwif. - * It's guaranteed that there are no drives - * left in the hwgroup. - */ - BUG_ON(hwgroup->drive != NULL); - hwgroup->hwif = g; - } - BUG_ON(hwgroup->hwif == hwif); - } - spin_unlock_irq(&hwgroup->lock); -} - /* - * This routine sets up the irq for an ide interface, and creates a new - * hwgroup for the irq/hwif if none was previously assigned. - * - * Much of the code is for correctly detecting/handling irq sharing - * and irq serialization situations. This is somewhat complex because - * it handles static as well as dynamic (PCMCIA) IDE interfaces. + * This routine sets up the IRQ for an IDE interface. */ static int init_irq (ide_hwif_t *hwif) { struct ide_io_ports *io_ports = &hwif->io_ports; - unsigned int index; - ide_hwgroup_t *hwgroup; - ide_hwif_t *match = NULL; + int sa = 0; mutex_lock(&ide_cfg_mtx); - hwif->hwgroup = NULL; + spin_lock_init(&hwif->lock); - for (index = 0; index < MAX_HWIFS; index++) { - ide_hwif_t *h = ide_ports[index]; + init_timer(&hwif->timer); + hwif->timer.function = &ide_timer_expiry; + hwif->timer.data = (unsigned long)hwif; - if (h && h->hwgroup) { /* scan only initialized ports */ - if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) { - if (hwif->host == h->host) - match = h; - } - } - } - - /* - * If we are still without a hwgroup, then form a new one - */ - if (match) { - hwgroup = match->hwgroup; - hwif->hwgroup = hwgroup; - /* - * Link us into the hwgroup. - * This must be done early, do ensure that unexpected_intr - * can find the hwif and prevent irq storms. - * No drives are attached to the new hwif, choose_drive - * can't do anything stupid (yet). - * Add ourself as the 2nd entry to the hwgroup->hwif - * linked list, the first entry is the hwif that owns - * hwgroup->handler - do not change that. - */ - spin_lock_irq(&hwgroup->lock); - hwif->next = hwgroup->hwif->next; - hwgroup->hwif->next = hwif; - BUG_ON(hwif->next == hwif); - spin_unlock_irq(&hwgroup->lock); - } else { - hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO, - hwif_to_node(hwif)); - if (hwgroup == NULL) - goto out_up; - - spin_lock_init(&hwgroup->lock); - - hwif->hwgroup = hwgroup; - hwgroup->hwif = hwif->next = hwif; - - init_timer(&hwgroup->timer); - hwgroup->timer.function = &ide_timer_expiry; - hwgroup->timer.data = (unsigned long) hwgroup; - } - - ide_ports[hwif->index] = hwif; - - /* - * Allocate the irq, if not already obtained for another hwif - */ - if (!match || match->irq != hwif->irq) { - int sa = 0; #if defined(__mc68000__) - sa = IRQF_SHARED; + sa = IRQF_SHARED; #endif /* __mc68000__ */ - if (hwif->chipset == ide_pci) - sa = IRQF_SHARED; + if (hwif->chipset == ide_pci) + sa = IRQF_SHARED; - if (io_ports->ctl_addr) - hwif->tp_ops->set_irq(hwif, 1); + if (io_ports->ctl_addr) + hwif->tp_ops->set_irq(hwif, 1); - if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup)) - goto out_unlink; - } + if (request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwif)) + goto out_up; if (!hwif->rqsize) { if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) || @@ -1111,14 +987,12 @@ static int init_irq (ide_hwif_t *hwif) printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name, io_ports->data_addr, hwif->irq); #endif /* __mc68000__ */ - if (match) - printk(KERN_CONT " (serialized with %s)", match->name); + if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) + printk(KERN_CONT " (serialized)"); printk(KERN_CONT "\n"); mutex_unlock(&ide_cfg_mtx); return 0; -out_unlink: - ide_remove_port_from_hwgroup(hwif); out_up: mutex_unlock(&ide_cfg_mtx); return 1; @@ -1134,7 +1008,7 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data) { ide_hwif_t *hwif = data; int unit = *part >> PARTN_BITS; - ide_drive_t *drive = &hwif->drives[unit]; + ide_drive_t *drive = hwif->devices[unit]; if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) return NULL; @@ -1196,47 +1070,23 @@ void ide_init_disk(struct gendisk *disk, ide_drive_t *drive) EXPORT_SYMBOL_GPL(ide_init_disk); -static void ide_remove_drive_from_hwgroup(ide_drive_t *drive) -{ - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; - - if (drive == drive->next) { - /* special case: last drive from hwgroup. */ - BUG_ON(hwgroup->drive != drive); - hwgroup->drive = NULL; - } else { - ide_drive_t *walk; - - walk = hwgroup->drive; - while (walk->next != drive) - walk = walk->next; - walk->next = drive->next; - if (hwgroup->drive == drive) { - hwgroup->drive = drive->next; - hwgroup->hwif = hwgroup->drive->hwif; - } - } - BUG_ON(hwgroup->drive == drive); -} - static void drive_release_dev (struct device *dev) { ide_drive_t *drive = container_of(dev, ide_drive_t, gendev); - ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; + ide_hwif_t *hwif = drive->hwif; ide_proc_unregister_device(drive); - spin_lock_irq(&hwgroup->lock); - ide_remove_drive_from_hwgroup(drive); + spin_lock_irq(&hwif->lock); kfree(drive->id); drive->id = NULL; drive->dev_flags &= ~IDE_DFLAG_PRESENT; /* Messed up locking ... */ - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); blk_cleanup_queue(drive->queue); - spin_lock_irq(&hwgroup->lock); + spin_lock_irq(&hwif->lock); drive->queue = NULL; - spin_unlock_irq(&hwgroup->lock); + spin_unlock_irq(&hwif->lock); complete(&drive->gendev_rel_comp); } @@ -1302,10 +1152,10 @@ out: static void hwif_register_devices(ide_hwif_t *hwif) { + ide_drive_t *drive; unsigned int i; - for (i = 0; i < MAX_DRIVES; i++) { - ide_drive_t *drive = &hwif->drives[i]; + ide_port_for_each_dev(i, drive, hwif) { struct device *dev = &drive->gendev; int ret; @@ -1328,11 +1178,10 @@ static void hwif_register_devices(ide_hwif_t *hwif) static void ide_port_init_devices(ide_hwif_t *hwif) { const struct ide_port_ops *port_ops = hwif->port_ops; + ide_drive_t *drive; int i; - for (i = 0; i < MAX_DRIVES; i++) { - ide_drive_t *drive = &hwif->drives[i]; - + ide_port_for_each_dev(i, drive, hwif) { drive->dn = i + hwif->channel * 2; if (hwif->host_flags & IDE_HFLAG_IO_32BIT) @@ -1380,6 +1229,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port, if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) { int rc; + hwif->dma_ops = d->dma_ops; + if (d->init_dma) rc = d->init_dma(hwif, d); else @@ -1387,12 +1238,13 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port, if (rc < 0) { printk(KERN_INFO "%s: DMA disabled\n", hwif->name); + + hwif->dma_ops = NULL; hwif->dma_base = 0; hwif->swdma_mask = 0; hwif->mwdma_mask = 0; hwif->ultra_mask = 0; - } else if (d->dma_ops) - hwif->dma_ops = d->dma_ops; + } } if ((d->host_flags & IDE_HFLAG_SERIALIZE) || @@ -1417,6 +1269,66 @@ static void ide_port_cable_detect(ide_hwif_t *hwif) } } +static const u8 ide_hwif_to_major[] = + { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, + IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR }; + +static void ide_port_init_devices_data(ide_hwif_t *hwif) +{ + ide_drive_t *drive; + int i; + + ide_port_for_each_dev(i, drive, hwif) { + u8 j = (hwif->index * MAX_DRIVES) + i; + + memset(drive, 0, sizeof(*drive)); + + drive->media = ide_disk; + drive->select = (i << 4) | ATA_DEVICE_OBS; + drive->hwif = hwif; + drive->ready_stat = ATA_DRDY; + drive->bad_wstat = BAD_W_STAT; + drive->special.b.recalibrate = 1; + drive->special.b.set_geometry = 1; + drive->name[0] = 'h'; + drive->name[1] = 'd'; + drive->name[2] = 'a' + j; + drive->max_failures = IDE_DEFAULT_MAX_FAILURES; + + INIT_LIST_HEAD(&drive->list); + init_completion(&drive->gendev_rel_comp); + } +} + +static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) +{ + /* fill in any non-zero initial values */ + hwif->index = index; + hwif->major = ide_hwif_to_major[index]; + + hwif->name[0] = 'i'; + hwif->name[1] = 'd'; + hwif->name[2] = 'e'; + hwif->name[3] = '0' + index; + + init_completion(&hwif->gendev_rel_comp); + + hwif->tp_ops = &default_tp_ops; + + ide_port_init_devices_data(hwif); +} + +static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) +{ + memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); + hwif->irq = hw->irq; + hwif->chipset = hw->chipset; + hwif->dev = hw->dev; + hwif->gendev.parent = hw->parent ? hw->parent : hw->dev; + hwif->ack_intr = hw->ack_intr; + hwif->config_data = hw->config; +} + static unsigned int ide_indexes; /** @@ -1466,12 +1378,43 @@ static void ide_free_port_slot(int idx) mutex_unlock(&ide_cfg_mtx); } +static void ide_port_free_devices(ide_hwif_t *hwif) +{ + ide_drive_t *drive; + int i; + + ide_port_for_each_dev(i, drive, hwif) + kfree(drive); +} + +static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) +{ + int i; + + for (i = 0; i < MAX_DRIVES; i++) { + ide_drive_t *drive; + + drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node); + if (drive == NULL) + goto out_nomem; + + hwif->devices[i] = drive; + } + return 0; + +out_nomem: + ide_port_free_devices(hwif); + return -ENOMEM; +} + struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) { struct ide_host *host; + struct device *dev = hws[0] ? hws[0]->dev : NULL; + int node = dev ? dev_to_node(dev) : -1; int i; - host = kzalloc(sizeof(*host), GFP_KERNEL); + host = kzalloc_node(sizeof(*host), GFP_KERNEL, node); if (host == NULL) return NULL; @@ -1482,10 +1425,15 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) if (hws[i] == NULL) continue; - hwif = kzalloc(sizeof(*hwif), GFP_KERNEL); + hwif = kzalloc_node(sizeof(*hwif), GFP_KERNEL, node); if (hwif == NULL) continue; + if (ide_port_alloc_devices(hwif, node) < 0) { + kfree(hwif); + continue; + } + idx = ide_find_port_slot(d); if (idx < 0) { printk(KERN_ERR "%s: no free slot for interface\n", @@ -1507,8 +1455,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) return NULL; } - if (hws[0]) - host->dev[0] = hws[0]->dev; + host->dev[0] = dev; if (d) { host->init_chipset = d->init_chipset; @@ -1525,9 +1472,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, ide_hwif_t *hwif, *mate = NULL; int i, j = 0; - for (i = 0; i < MAX_HOST_PORTS; i++) { - hwif = host->ports[i]; - + ide_host_for_each_port(i, hwif, host) { if (hwif == NULL) { mate = NULL; continue; @@ -1553,9 +1498,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, ide_port_init_devices(hwif); } - for (i = 0; i < MAX_HOST_PORTS; i++) { - hwif = host->ports[i]; - + ide_host_for_each_port(i, hwif, host) { if (hwif == NULL) continue; @@ -1570,9 +1513,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, ide_port_tune_devices(hwif); } - for (i = 0; i < MAX_HOST_PORTS; i++) { - hwif = host->ports[i]; - + ide_host_for_each_port(i, hwif, host) { if (hwif == NULL) continue; @@ -1597,9 +1538,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, ide_acpi_port_init_devices(hwif); } - for (i = 0; i < MAX_HOST_PORTS; i++) { - hwif = host->ports[i]; - + ide_host_for_each_port(i, hwif, host) { if (hwif == NULL) continue; @@ -1607,9 +1546,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, hwif_register_devices(hwif); } - for (i = 0; i < MAX_HOST_PORTS; i++) { - hwif = host->ports[i]; - + ide_host_for_each_port(i, hwif, host) { if (hwif == NULL) continue; @@ -1647,17 +1584,85 @@ int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws, } EXPORT_SYMBOL_GPL(ide_host_add); +static void __ide_port_unregister_devices(ide_hwif_t *hwif) +{ + ide_drive_t *drive; + int i; + + ide_port_for_each_dev(i, drive, hwif) { + if (drive->dev_flags & IDE_DFLAG_PRESENT) { + device_unregister(&drive->gendev); + wait_for_completion(&drive->gendev_rel_comp); + } + } +} + +void ide_port_unregister_devices(ide_hwif_t *hwif) +{ + mutex_lock(&ide_cfg_mtx); + __ide_port_unregister_devices(hwif); + hwif->present = 0; + ide_port_init_devices_data(hwif); + mutex_unlock(&ide_cfg_mtx); +} +EXPORT_SYMBOL_GPL(ide_port_unregister_devices); + +/** + * ide_unregister - free an IDE interface + * @hwif: IDE interface + * + * Perform the final unregister of an IDE interface. + * + * Locking: + * The caller must not hold the IDE locks. + * + * It is up to the caller to be sure there is no pending I/O here, + * and that the interface will not be reopened (present/vanishing + * locking isn't yet done BTW). + */ + +static void ide_unregister(ide_hwif_t *hwif) +{ + BUG_ON(in_interrupt()); + BUG_ON(irqs_disabled()); + + mutex_lock(&ide_cfg_mtx); + + if (hwif->present) { + __ide_port_unregister_devices(hwif); + hwif->present = 0; + } + + ide_proc_unregister_port(hwif); + + free_irq(hwif->irq, hwif); + + device_unregister(hwif->portdev); + device_unregister(&hwif->gendev); + wait_for_completion(&hwif->gendev_rel_comp); + + /* + * Remove us from the kernel's knowledge + */ + blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS); + kfree(hwif->sg_table); + unregister_blkdev(hwif->major, hwif->name); + + ide_release_dma_engine(hwif); + + mutex_unlock(&ide_cfg_mtx); +} + void ide_host_free(struct ide_host *host) { ide_hwif_t *hwif; int i; - for (i = 0; i < MAX_HOST_PORTS; i++) { - hwif = host->ports[i]; - + ide_host_for_each_port(i, hwif, host) { if (hwif == NULL) continue; + ide_port_free_devices(hwif); ide_free_port_slot(hwif->index); kfree(hwif); } @@ -1668,11 +1673,12 @@ EXPORT_SYMBOL_GPL(ide_host_free); void ide_host_remove(struct ide_host *host) { + ide_hwif_t *hwif; int i; - for (i = 0; i < MAX_HOST_PORTS; i++) { - if (host->ports[i]) - ide_unregister(host->ports[i]); + ide_host_for_each_port(i, hwif, host) { + if (hwif) + ide_unregister(hwif); } ide_host_free(host); @@ -1691,8 +1697,8 @@ void ide_port_scan(ide_hwif_t *hwif) hwif->present = 1; ide_port_tune_devices(hwif); - ide_acpi_port_init_devices(hwif); ide_port_setup_devices(hwif); + ide_acpi_port_init_devices(hwif); hwif_register_devices(hwif); ide_proc_port_register_devices(hwif); } diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index a14e2938e4f3..1d8978b3314a 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -439,13 +439,13 @@ static int proc_ide_read_dmodel static int proc_ide_read_driver (char *page, char **start, off_t off, int count, int *eof, void *data) { - ide_drive_t *drive = (ide_drive_t *) data; - struct device *dev = &drive->gendev; - ide_driver_t *ide_drv; - int len; + ide_drive_t *drive = (ide_drive_t *)data; + struct device *dev = &drive->gendev; + struct ide_driver *ide_drv; + int len; if (dev->driver) { - ide_drv = container_of(dev->driver, ide_driver_t, gen_driver); + ide_drv = to_ide_driver(dev->driver); len = sprintf(page, "%s version %s\n", dev->driver->name, ide_drv->version); } else @@ -555,7 +555,7 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t } } -void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) +void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver) { mutex_lock(&ide_setting_mtx); drive->settings = driver->proc_devsets(drive); @@ -577,7 +577,7 @@ EXPORT_SYMBOL(ide_proc_register_driver); * Takes ide_setting_mtx. */ -void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) +void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver) { ide_remove_proc_entries(drive->proc, driver->proc_entries(drive)); @@ -593,14 +593,13 @@ EXPORT_SYMBOL(ide_proc_unregister_driver); void ide_proc_port_register_devices(ide_hwif_t *hwif) { - int d; struct proc_dir_entry *ent; struct proc_dir_entry *parent = hwif->proc; + ide_drive_t *drive; char name[64]; + int i; - for (d = 0; d < MAX_DRIVES; d++) { - ide_drive_t *drive = &hwif->drives[d]; - + ide_port_for_each_dev(i, drive, hwif) { if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0 || drive->proc) continue; @@ -653,7 +652,7 @@ void ide_proc_unregister_port(ide_hwif_t *hwif) static int proc_print_driver(struct device_driver *drv, void *data) { - ide_driver_t *ide_drv = container_of(drv, ide_driver_t, gen_driver); + struct ide_driver *ide_drv = to_ide_driver(drv); struct seq_file *s = data; seq_printf(s, "%s version %s\n", drv->name, ide_drv->version); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 5d2aa22cd6e4..d7ecd3c79757 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -166,10 +166,10 @@ struct idetape_bh { * to an interrupt or a timer event is stored in the struct defined below. */ typedef struct ide_tape_obj { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct kref kref; + ide_drive_t *drive; + struct ide_driver *driver; + struct gendisk *disk; + struct kref kref; /* * failed_pc points to the last failed packet command, or contains @@ -479,7 +479,7 @@ static void ide_tape_kfree_buffer(idetape_tape_t *tape) static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects) { - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = drive->hwif->rq; idetape_tape_t *tape = drive->driver_data; unsigned long flags; int error; @@ -531,7 +531,7 @@ static void ide_tape_callback(ide_drive_t *drive, int dsc) printk(KERN_ERR "ide-tape: Error in REQUEST SENSE " "itself - Aborting request!\n"); } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) { - struct request *rq = drive->hwif->hwgroup->rq; + struct request *rq = drive->hwif->rq; int blocks = pc->xferred / tape->blk_size; tape->avg_size += blocks * tape->blk_size; @@ -576,7 +576,7 @@ static void ide_tape_callback(ide_drive_t *drive, int dsc) /* * Postpone the current request so that ide.c will be able to service requests - * from another device on the same hwgroup while we are polling for DSC. + * from another device on the same port while we are polling for DSC. */ static void idetape_postpone_request(ide_drive_t *drive) { @@ -584,7 +584,8 @@ static void idetape_postpone_request(ide_drive_t *drive) debug_log(DBG_PROCS, "Enter %s\n", __func__); - tape->postponed_rq = HWGROUP(drive)->rq; + tape->postponed_rq = drive->hwif->rq; + ide_stall_queue(drive, tape->dsc_poll_freq); } @@ -2312,7 +2313,7 @@ static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive) static int ide_tape_probe(ide_drive_t *); -static ide_driver_t idetape_driver = { +static struct ide_driver idetape_driver = { .gen_driver = { .owner = THIS_MODULE, .name = "ide-tape", @@ -2323,7 +2324,6 @@ static ide_driver_t idetape_driver = { .version = IDETAPE_VERSION, .do_request = idetape_do_request, .end_request = idetape_end_request, - .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc_entries = ide_tape_proc_entries, .proc_devsets = ide_tape_proc_devsets, diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index bf4fb9d8d176..16138bce84a7 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -58,7 +58,7 @@ static ide_startstop_t task_in_intr(ide_drive_t *); ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct ide_taskfile *tf = &task->tf; ide_handler_t *handler = NULL; const struct ide_tp_ops *tp_ops = hwif->tp_ops; @@ -309,9 +309,9 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq, } if (sectors > 0) { - ide_driver_t *drv; + struct ide_driver *drv; - drv = *(ide_driver_t **)rq->rq_disk->private_data; + drv = *(struct ide_driver **)rq->rq_disk->private_data; drv->end_request(drive, 1, sectors); } } @@ -328,9 +328,9 @@ void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat) } if (rq->rq_disk) { - ide_driver_t *drv; + struct ide_driver *drv; - drv = *(ide_driver_t **)rq->rq_disk->private_data;; + drv = *(struct ide_driver **)rq->rq_disk->private_data;; drv->end_request(drive, 1, rq->nr_sectors); } else ide_end_request(drive, 1, rq->nr_sectors); @@ -361,7 +361,7 @@ static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq static ide_startstop_t task_in_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; u8 stat = hwif->tp_ops->read_status(hwif); /* Error? */ @@ -395,7 +395,7 @@ static ide_startstop_t task_in_intr(ide_drive_t *drive) static ide_startstop_t task_out_intr (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = hwif->rq; u8 stat = hwif->tp_ops->read_status(hwif); if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat)) diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 46a2d4ca812b..258805da15c3 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -60,179 +60,8 @@ #include <linux/completion.h> #include <linux/device.h> - -/* default maximum number of failures */ -#define IDE_DEFAULT_MAX_FAILURES 1 - struct class *ide_port_class; -static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, - IDE2_MAJOR, IDE3_MAJOR, - IDE4_MAJOR, IDE5_MAJOR, - IDE6_MAJOR, IDE7_MAJOR, - IDE8_MAJOR, IDE9_MAJOR }; - -DEFINE_MUTEX(ide_cfg_mtx); - -static void ide_port_init_devices_data(ide_hwif_t *); - -/* - * Do not even *think* about calling this! - */ -void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) -{ - /* bulk initialize hwif & drive info with zeros */ - memset(hwif, 0, sizeof(ide_hwif_t)); - - /* fill in any non-zero initial values */ - hwif->index = index; - hwif->major = ide_hwif_to_major[index]; - - hwif->name[0] = 'i'; - hwif->name[1] = 'd'; - hwif->name[2] = 'e'; - hwif->name[3] = '0' + index; - - init_completion(&hwif->gendev_rel_comp); - - hwif->tp_ops = &default_tp_ops; - - ide_port_init_devices_data(hwif); -} - -static void ide_port_init_devices_data(ide_hwif_t *hwif) -{ - int unit; - - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - u8 j = (hwif->index * MAX_DRIVES) + unit; - - memset(drive, 0, sizeof(*drive)); - - drive->media = ide_disk; - drive->select = (unit << 4) | ATA_DEVICE_OBS; - drive->hwif = hwif; - drive->ready_stat = ATA_DRDY; - drive->bad_wstat = BAD_W_STAT; - drive->special.b.recalibrate = 1; - drive->special.b.set_geometry = 1; - drive->name[0] = 'h'; - drive->name[1] = 'd'; - drive->name[2] = 'a' + j; - drive->max_failures = IDE_DEFAULT_MAX_FAILURES; - - INIT_LIST_HEAD(&drive->list); - init_completion(&drive->gendev_rel_comp); - } -} - -static void __ide_port_unregister_devices(ide_hwif_t *hwif) -{ - int i; - - for (i = 0; i < MAX_DRIVES; i++) { - ide_drive_t *drive = &hwif->drives[i]; - - if (drive->dev_flags & IDE_DFLAG_PRESENT) { - device_unregister(&drive->gendev); - wait_for_completion(&drive->gendev_rel_comp); - } - } -} - -void ide_port_unregister_devices(ide_hwif_t *hwif) -{ - mutex_lock(&ide_cfg_mtx); - __ide_port_unregister_devices(hwif); - hwif->present = 0; - ide_port_init_devices_data(hwif); - mutex_unlock(&ide_cfg_mtx); -} -EXPORT_SYMBOL_GPL(ide_port_unregister_devices); - -/** - * ide_unregister - free an IDE interface - * @hwif: IDE interface - * - * Perform the final unregister of an IDE interface. At the moment - * we don't refcount interfaces so this will also get split up. - * - * Locking: - * The caller must not hold the IDE locks - * The drive present/vanishing is not yet properly locked - * Take care with the callbacks. These have been split to avoid - * deadlocking the IDE layer. The shutdown callback is called - * before we take the lock and free resources. It is up to the - * caller to be sure there is no pending I/O here, and that - * the interface will not be reopened (present/vanishing locking - * isn't yet done BTW). After we commit to the final kill we - * call the cleanup callback with the ide locks held. - * - * Unregister restores the hwif structures to the default state. - * This is raving bonkers. - */ - -void ide_unregister(ide_hwif_t *hwif) -{ - ide_hwif_t *g; - ide_hwgroup_t *hwgroup; - int irq_count = 0; - - BUG_ON(in_interrupt()); - BUG_ON(irqs_disabled()); - - mutex_lock(&ide_cfg_mtx); - - if (hwif->present) { - __ide_port_unregister_devices(hwif); - hwif->present = 0; - } - - ide_proc_unregister_port(hwif); - - hwgroup = hwif->hwgroup; - /* - * free the irq if we were the only hwif using it - */ - g = hwgroup->hwif; - do { - if (g->irq == hwif->irq) - ++irq_count; - g = g->next; - } while (g != hwgroup->hwif); - if (irq_count == 1) - free_irq(hwif->irq, hwgroup); - - ide_remove_port_from_hwgroup(hwif); - - device_unregister(hwif->portdev); - device_unregister(&hwif->gendev); - wait_for_completion(&hwif->gendev_rel_comp); - - /* - * Remove us from the kernel's knowledge - */ - blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS); - kfree(hwif->sg_table); - unregister_blkdev(hwif->major, hwif->name); - - ide_release_dma_engine(hwif); - - mutex_unlock(&ide_cfg_mtx); -} - -void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) -{ - memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); - hwif->irq = hw->irq; - hwif->chipset = hw->chipset; - hwif->dev = hw->dev; - hwif->gendev.parent = hw->parent ? hw->parent : hw->dev; - hwif->ack_intr = hw->ack_intr; - hwif->config_data = hw->config; -} - /* * Locks for IDE setting functionality */ @@ -330,7 +159,6 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio) static int set_pio_mode(ide_drive_t *drive, int arg) { ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *hwgroup = hwif->hwgroup; const struct ide_port_ops *port_ops = hwif->port_ops; if (arg < 0 || arg > 255) @@ -345,9 +173,9 @@ static int set_pio_mode(ide_drive_t *drive, int arg) unsigned long flags; /* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */ - spin_lock_irqsave(&hwgroup->lock, flags); + spin_lock_irqsave(&hwif->lock, flags); port_ops->set_pio_mode(drive, arg); - spin_unlock_irqrestore(&hwgroup->lock, flags); + spin_unlock_irqrestore(&hwif->lock, flags); } else port_ops->set_pio_mode(drive, arg); } else { @@ -453,7 +281,7 @@ static int ide_uevent(struct device *dev, struct kobj_uevent_env *env) static int generic_ide_probe(struct device *dev) { ide_drive_t *drive = to_ide_device(dev); - ide_driver_t *drv = to_ide_driver(dev->driver); + struct ide_driver *drv = to_ide_driver(dev->driver); return drv->probe ? drv->probe(drive) : -ENODEV; } @@ -461,7 +289,7 @@ static int generic_ide_probe(struct device *dev) static int generic_ide_remove(struct device *dev) { ide_drive_t *drive = to_ide_device(dev); - ide_driver_t *drv = to_ide_driver(dev->driver); + struct ide_driver *drv = to_ide_driver(dev->driver); if (drv->remove) drv->remove(drive); @@ -472,7 +300,7 @@ static int generic_ide_remove(struct device *dev) static void generic_ide_shutdown(struct device *dev) { ide_drive_t *drive = to_ide_device(dev); - ide_driver_t *drv = to_ide_driver(dev->driver); + struct ide_driver *drv = to_ide_driver(dev->driver); if (dev->driver && drv->shutdown) drv->shutdown(drive); @@ -660,6 +488,7 @@ MODULE_PARM_DESC(ignore_cable, "ignore cable detection"); void ide_port_apply_params(ide_hwif_t *hwif) { + ide_drive_t *drive; int i; if (ide_ignore_cable & (1 << hwif->index)) { @@ -668,8 +497,8 @@ void ide_port_apply_params(ide_hwif_t *hwif) hwif->cbl = ATA_CBL_PATA40_SHORT; } - for (i = 0; i < MAX_DRIVES; i++) - ide_dev_apply_params(&hwif->drives[i], i); + ide_port_for_each_dev(i, drive, hwif) + ide_dev_apply_params(drive, i); } /* diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c new file mode 100644 index 000000000000..e021078cd06b --- /dev/null +++ b/drivers/ide/it8172.c @@ -0,0 +1,166 @@ +/* + * + * BRIEF MODULE DESCRIPTION + * IT8172 IDE controller support + * + * Copyright (C) 2000 MontaVista Software Inc. + * Copyright (C) 2008 Shane McDonald + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ioport.h> +#include <linux/pci.h> +#include <linux/ide.h> +#include <linux/init.h> + +#define DRV_NAME "IT8172" + +static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio) +{ + ide_hwif_t *hwif = drive->hwif; + struct pci_dev *dev = to_pci_dev(hwif->dev); + u16 drive_enables; + u32 drive_timing; + + /* + * The highest value of DIOR/DIOW pulse width and recovery time + * that can be set in the IT8172 is 8 PCI clock cycles. As a result, + * it cannot be configured for PIO mode 0. This table sets these + * parameters to the maximum supported by the IT8172. + */ + static const u8 timings[] = { 0x3f, 0x3c, 0x1b, 0x12, 0x0a }; + + pci_read_config_word(dev, 0x40, &drive_enables); + pci_read_config_dword(dev, 0x44, &drive_timing); + + /* + * Enable port 0x44. The IT8172 spec is confused; it calls + * this register the "Slave IDE Timing Register", but in fact, + * it controls timing for both master and slave drives. + */ + drive_enables |= 0x4000; + + drive_enables &= drive->dn ? 0xc006 : 0xc060; + if (drive->media == ide_disk) + /* enable prefetch */ + drive_enables |= 0x0004 << (drive->dn * 4); + if (ata_id_has_iordy(drive->id)) + /* enable IORDY sample-point */ + drive_enables |= 0x0002 << (drive->dn * 4); + + drive_timing &= drive->dn ? 0x00003f00 : 0x000fc000; + drive_timing |= timings[pio] << (drive->dn * 6 + 8); + + pci_write_config_word(dev, 0x40, drive_enables); + pci_write_config_dword(dev, 0x44, drive_timing); +} + +static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed) +{ + ide_hwif_t *hwif = drive->hwif; + struct pci_dev *dev = to_pci_dev(hwif->dev); + int a_speed = 3 << (drive->dn * 4); + int u_flag = 1 << drive->dn; + int u_speed = 0; + u8 reg48, reg4a; + + pci_read_config_byte(dev, 0x48, ®48); + pci_read_config_byte(dev, 0x4a, ®4a); + + if (speed >= XFER_UDMA_0) { + u8 udma = speed - XFER_UDMA_0; + u_speed = udma << (drive->dn * 4); + + pci_write_config_byte(dev, 0x48, reg48 | u_flag); + reg4a &= ~a_speed; + pci_write_config_byte(dev, 0x4a, reg4a | u_speed); + } else { + const u8 mwdma_to_pio[] = { 0, 3, 4 }; + u8 pio; + + pci_write_config_byte(dev, 0x48, reg48 & ~u_flag); + pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed); + + pio = mwdma_to_pio[speed - XFER_MW_DMA_0]; + + it8172_set_pio_mode(drive, pio); + } +} + + +static const struct ide_port_ops it8172_port_ops = { + .set_pio_mode = it8172_set_pio_mode, + .set_dma_mode = it8172_set_dma_mode, +}; + +static const struct ide_port_info it8172_port_info __devinitdata = { + .name = DRV_NAME, + .port_ops = &it8172_port_ops, + .enablebits = { {0x41, 0x80, 0x80}, {0x00, 0x00, 0x00} }, + .host_flags = IDE_HFLAG_SINGLE, + .pio_mask = ATA_PIO4 & ~ATA_PIO0, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA2, +}; + +static int __devinit it8172_init_one(struct pci_dev *dev, + const struct pci_device_id *id) +{ + if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) + return -ENODEV; /* IT8172 is more than an IDE controller */ + return ide_pci_init_one(dev, &it8172_port_info, NULL); +} + +static struct pci_device_id it8172_pci_tbl[] = { + { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8172), 0 }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, it8172_pci_tbl); + +static struct pci_driver it8172_pci_driver = { + .name = "IT8172_IDE", + .id_table = it8172_pci_tbl, + .probe = it8172_init_one, + .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, +}; + +static int __init it8172_ide_init(void) +{ + return ide_pci_register_driver(&it8172_pci_driver); +} + +static void __exit it8172_ide_exit(void) +{ + pci_unregister_driver(&it8172_pci_driver); +} + +module_init(it8172_ide_init); +module_exit(it8172_ide_exit); + +MODULE_AUTHOR("Steve Longerbeam"); +MODULE_DESCRIPTION("PCI driver module for ITE 8172 IDE"); +MODULE_LICENSE("GPL"); diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c index 7c2feeb3c5ec..d7969b6d139e 100644 --- a/drivers/ide/it8213.c +++ b/drivers/ide/it8213.c @@ -25,7 +25,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int is_slave = drive->dn & 1; int master_port = 0x40; @@ -82,7 +82,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio) static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 maslave = 0x40; int a_speed = 3 << (drive->dn * 4); diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index ef004089761b..0be27ac1f077 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -167,12 +167,10 @@ static void it821x_clock_strategy(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); struct it821x_dev *itdev = ide_get_hwifdata(hwif); - ide_drive_t *pair; + ide_drive_t *pair = ide_get_pair_dev(drive); int clock, altclock, sel = 0; u8 unit = drive->dn & 1, v; - pair = &hwif->drives[1 - unit]; - if(itdev->want[0][0] > itdev->want[1][0]) { clock = itdev->want[0][1]; altclock = itdev->want[1][1]; @@ -239,15 +237,13 @@ static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = drive->hwif; struct it821x_dev *itdev = ide_get_hwifdata(hwif); - ide_drive_t *pair; + ide_drive_t *pair = ide_get_pair_dev(drive); u8 unit = drive->dn & 1, set_pio = pio; /* Spec says 89 ref driver uses 88 */ static u16 pio_timings[]= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 }; static u8 pio_want[] = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY }; - pair = &hwif->drives[1 - unit]; - /* * Compute the best PIO mode we can for a given device. We must * pick a speed that does not cause problems with the other device @@ -279,7 +275,7 @@ static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio) * the shared MWDMA/PIO timing register. */ -static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted) +static void it821x_tune_mwdma(ide_drive_t *drive, u8 mode_wanted) { ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); @@ -316,7 +312,7 @@ static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted) * controller when doing UDMA modes in pass through. */ -static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted) +static void it821x_tune_udma(ide_drive_t *drive, u8 mode_wanted) { ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); @@ -516,6 +512,7 @@ static struct ide_dma_ops it821x_pass_through_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_sff_read_status = ide_dma_sff_read_status, }; /** diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 13789060f407..83643ed9a426 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -56,7 +56,7 @@ static u8 superio_read_status(ide_hwif_t *hwif) return superio_ide_inb(hwif->io_ports.status_addr); } -static u8 superio_read_sff_dma_status(ide_hwif_t *hwif) +static u8 superio_dma_sff_read_status(ide_hwif_t *hwif) { return superio_ide_inb(hwif->dma_base + ATA_DMA_STATUS); } @@ -109,7 +109,6 @@ static const struct ide_tp_ops superio_tp_ops = { .exec_command = ide_exec_command, .read_status = superio_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = superio_read_sff_dma_status, .set_irq = ide_set_irq, @@ -132,18 +131,20 @@ static void __devinit superio_init_iops(struct hwif_s *hwif) tmp = superio_ide_inb(dma_stat); outb(tmp | 0x66, dma_stat); } +#else +#define superio_dma_sff_read_status ide_dma_sff_read_status #endif static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 }; /* * This routine either enables/disables (according to IDE_DFLAG_PRESENT) - * the IRQ associated with the port (HWIF(drive)), + * the IRQ associated with the port, * and selects either PIO or DMA handshaking for the next I/O operation. */ static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned int bit, other, new, *old = (unsigned int *) hwif->select_data; unsigned long flags; @@ -197,11 +198,11 @@ static void ns87415_selectproc (ide_drive_t *drive) static int ns87415_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u8 dma_stat = 0, dma_cmd = 0; drive->waiting_for_dma = 0; - dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); /* get DMA command mode */ dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD); /* stop DMA */ @@ -308,6 +309,7 @@ static const struct ide_dma_ops ns87415_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = superio_dma_sff_read_status, }; static const struct ide_port_info ns87415_chipset __devinitdata = { diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c index 122ed3c072fd..a7ac490c9ae3 100644 --- a/drivers/ide/palm_bk3710.c +++ b/drivers/ide/palm_bk3710.c @@ -324,8 +324,6 @@ static int __devinit palm_bk3710_init_dma(ide_hwif_t *hwif, hwif->dma_base = hwif->io_ports.data_addr - IDE_PALM_ATA_PRI_REG_OFFSET; - hwif->dma_ops = &sff_dma_ops; - return 0; } @@ -338,6 +336,7 @@ static const struct ide_port_ops palm_bk3710_ports_ops = { static struct ide_port_info __devinitdata palm_bk3710_port_info = { .init_dma = palm_bk3710_init_dma, .port_ops = &palm_bk3710_ports_ops, + .dma_ops = &sff_dma_ops, .host_flags = IDE_HFLAG_MMIO, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c index 211ae46e3e0c..f21290c4b447 100644 --- a/drivers/ide/pdc202xx_new.c +++ b/drivers/ide/pdc202xx_new.c @@ -143,7 +143,7 @@ static struct udma_timing { static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 adj = (drive->dn & 1) ? 0x08 : 0x00; @@ -219,7 +219,7 @@ static void pdcnew_reset(ide_drive_t *drive) * Deleted this because it is redundant from the caller. */ printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n", - HWIF(drive)->channel ? "Secondary" : "Primary"); + drive->hwif->channel ? "Secondary" : "Primary"); } /** diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index 624e62e5cc9a..97193323aebf 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -39,7 +39,7 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *); static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 drive_pci = 0x60 + (drive->dn << 2); @@ -169,8 +169,8 @@ static void pdc202xx_dma_start(ide_drive_t *drive) if (drive->current_speed > XFER_UDMA_2) pdc_old_enable_66MHz_clock(drive->hwif); if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) { - struct request *rq = HWGROUP(drive)->rq; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; + struct request *rq = hwif->rq; unsigned long high_16 = hwif->extra_base - 16; unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20); u32 word_count = 0; @@ -189,7 +189,7 @@ static void pdc202xx_dma_start(ide_drive_t *drive) static int pdc202xx_dma_end(ide_drive_t *drive) { if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long high_16 = hwif->extra_base - 16; unsigned long atapi_reg = high_16 + (hwif->channel ? 0x24 : 0x20); u8 clock = 0; @@ -205,7 +205,7 @@ static int pdc202xx_dma_end(ide_drive_t *drive) static int pdc202xx_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long high_16 = hwif->extra_base - 16; u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); u8 sc1d = inb(high_16 + 0x001d); @@ -243,7 +243,7 @@ static void pdc202xx_reset_host (ide_hwif_t *hwif) static void pdc202xx_reset (ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; ide_hwif_t *mate = hwif->mate; pdc202xx_reset_host(hwif); @@ -337,6 +337,7 @@ static const struct ide_dma_ops pdc20246_dma_ops = { .dma_test_irq = pdc202xx_dma_test_irq, .dma_lost_irq = pdc202xx_dma_lost_irq, .dma_timeout = pdc202xx_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_dma_ops pdc2026x_dma_ops = { @@ -348,6 +349,7 @@ static const struct ide_dma_ops pdc2026x_dma_ops = { .dma_test_irq = pdc202xx_dma_test_irq, .dma_lost_irq = pdc202xx_dma_lost_irq, .dma_timeout = pdc202xx_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; #define DECLARE_PDC2026X_DEV(udma, sectors) \ diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c index 61d2d920a5cd..f1e2e4ef0d71 100644 --- a/drivers/ide/piix.c +++ b/drivers/ide/piix.c @@ -67,7 +67,7 @@ static int no_piix_dma; static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int is_slave = drive->dn & 1; int master_port = hwif->channel ? 0x42 : 0x40; @@ -136,7 +136,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio) static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 maslave = hwif->channel ? 0x42 : 0x40; int a_speed = 3 << (drive->dn * 4); @@ -224,7 +224,7 @@ static unsigned int init_chipset_ich(struct pci_dev *dev) */ static void ich_clear_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u8 dma_stat; /* @@ -260,6 +260,8 @@ static const struct ich_laptop ich_laptop[] = { { 0x27DF, 0x103C, 0x30A1 }, /* ICH7 on HP Compaq nc2400 */ { 0x27DF, 0x1071, 0xD221 }, /* ICH7 on Hercules EC-900 */ { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on Acer Aspire 2023WLMi */ + { 0x24CA, 0x1025, 0x003d }, /* ICH4 on ACER TM290 */ + { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */ { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */ /* end marker */ { 0, } diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 7c481bb56fab..74625e821a43 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -955,7 +955,6 @@ static const struct ide_tp_ops pmac_tp_ops = { .exec_command = pmac_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = pmac_set_irq, @@ -1513,10 +1512,10 @@ use_pio_instead: static int pmac_ide_dma_setup(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent); - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = hwif->rq; u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4); if (!pmac_ide_build_dmatable(drive, rq)) { @@ -1637,7 +1636,7 @@ pmac_ide_dma_test_irq (ide_drive_t *drive) break; if (++timeout > 100) { printk(KERN_WARNING "ide%d, ide_dma_test_irq \ - timeout flushing channel\n", HWIF(drive)->index); + timeout flushing channel\n", hwif->index); break; } } diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index 4af4a8ce4cdf..9f9c0b3cc3a3 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -99,7 +99,6 @@ static const struct ide_tp_ops q40ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c index bc27c7aba936..5b2e3af43c4b 100644 --- a/drivers/ide/qd65xx.c +++ b/drivers/ide/qd65xx.c @@ -202,7 +202,8 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120; } - qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time)); + qd_set_timing(drive, qd6500_compute_timing(drive->hwif, + active_time, recovery_time)); } static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) @@ -245,11 +246,11 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio) printk(KERN_INFO "%s: PIO mode%d\n", drive->name,pio); } - if (!HWIF(drive)->channel && drive->media != ide_disk) { + if (!hwif->channel && drive->media != ide_disk) { outb(0x5f, QD_CONTROL_PORT); printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO " "and post-write buffer on %s.\n", - drive->name, HWIF(drive)->name); + drive->name, hwif->name); } qd_set_timing(drive, qd6580_compute_timing(active_time, recovery_time)); diff --git a/drivers/ide/qd65xx.h b/drivers/ide/qd65xx.h index c83dea85e621..6636f9665d16 100644 --- a/drivers/ide/qd65xx.h +++ b/drivers/ide/qd65xx.h @@ -31,8 +31,8 @@ #define QD_CONFIG(hwif) ((hwif)->config_data & 0x00ff) -#define QD_TIMING(drive) (byte)(((drive)->drive_data) & 0x00ff) -#define QD_TIMREG(drive) (byte)((((drive)->drive_data) & 0xff00) >> 8) +#define QD_TIMING(drive) (u8)(((drive)->drive_data) & 0x00ff) +#define QD_TIMREG(drive) (u8)((((drive)->drive_data) & 0xff00) >> 8) #define QD6500_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08)) #define QD6580_DEF_DATA ((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00)) diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c index ec7f766ef5e4..dbdd2985a0d8 100644 --- a/drivers/ide/sc1200.c +++ b/drivers/ide/sc1200.c @@ -125,7 +125,7 @@ out: static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned int reg, timings; unsigned short pci_clock; @@ -170,9 +170,9 @@ static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode) */ static int sc1200_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long dma_base = hwif->dma_base; - byte dma_stat; + u8 dma_stat; dma_stat = inb(dma_base+2); /* get DMA status */ @@ -199,7 +199,7 @@ static int sc1200_dma_end(ide_drive_t *drive) static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; int mode = -1; /* @@ -292,6 +292,7 @@ static const struct ide_dma_ops sc1200_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_port_info sc1200_chipset __devinitdata = { diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 0f48f9dacfa5..8d2314b6327c 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -143,7 +143,7 @@ static u8 scc_read_altstatus(ide_hwif_t *hwif) return (u8)in_be32((void *)hwif->io_ports.ctl_addr); } -static u8 scc_read_sff_dma_status(ide_hwif_t *hwif) +static u8 scc_dma_sff_read_status(ide_hwif_t *hwif) { return (u8)in_be32((void *)(hwif->dma_base + 4)); } @@ -217,7 +217,7 @@ scc_ide_outsl(unsigned long port, void *addr, u32 count) static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct scc_ports *ports = ide_get_hwifdata(hwif); unsigned long ctl_base = ports->ctl; unsigned long cckctrl_port = ctl_base + 0xff0; @@ -249,7 +249,7 @@ static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio) static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct scc_ports *ports = ide_get_hwifdata(hwif); unsigned long ctl_base = ports->ctl; unsigned long cckctrl_port = ctl_base + 0xff0; @@ -259,7 +259,7 @@ static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed) unsigned long scrcst_port = ctl_base + 0x014; unsigned long udenvt_port = ctl_base + 0x018; unsigned long tdvhsel_port = ctl_base + 0x020; - int is_slave = (&hwif->drives[1] == drive); + int is_slave = drive->dn & 1; int offset, idx; unsigned long reg; unsigned long jcactsel; @@ -292,7 +292,7 @@ static void scc_dma_host_set(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; u8 unit = drive->dn & 1; - u8 dma_stat = scc_ide_inb(hwif->dma_base + 4); + u8 dma_stat = scc_dma_sff_read_status(hwif); if (on) dma_stat |= (1 << (5 + unit)); @@ -316,7 +316,7 @@ static void scc_dma_host_set(ide_drive_t *drive, int on) static int scc_dma_setup(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = hwif->rq; unsigned int reading; u8 dma_stat; @@ -338,7 +338,7 @@ static int scc_dma_setup(ide_drive_t *drive) out_be32((void __iomem *)hwif->dma_base, reading); /* read DMA status for INTR & ERROR flags */ - dma_stat = in_be32((void __iomem *)(hwif->dma_base + 4)); + dma_stat = scc_dma_sff_read_status(hwif); /* clear INTR & ERROR flags */ out_be32((void __iomem *)(hwif->dma_base + 4), dma_stat | 6); @@ -367,7 +367,7 @@ static int __scc_dma_end(ide_drive_t *drive) /* stop DMA */ scc_ide_outb(dma_cmd & ~1, hwif->dma_base); /* get DMA status */ - dma_stat = scc_ide_inb(hwif->dma_base + 4); + dma_stat = scc_dma_sff_read_status(hwif); /* clear the INTR & ERROR bits */ scc_ide_outb(dma_stat | 6, hwif->dma_base + 4); /* purge DMA mappings */ @@ -387,7 +387,7 @@ static int __scc_dma_end(ide_drive_t *drive) static int scc_dma_end(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; void __iomem *dma_base = (void __iomem *)hwif->dma_base; unsigned long intsts_port = hwif->dma_base + 0x014; u32 reg; @@ -405,17 +405,18 @@ static int scc_dma_end(ide_drive_t *drive) drive->name); data_loss = 1; if (retry++) { - struct request *rq = HWGROUP(drive)->rq; - int unit; + struct request *rq = hwif->rq; + ide_drive_t *drive; + int i; + /* ERROR_RESET and drive->crc_count are needed * to reduce DMA transfer mode in retry process. */ if (rq) rq->errors |= ERROR_RESET; - for (unit = 0; unit < MAX_DRIVES; unit++) { - ide_drive_t *drive = &hwif->drives[unit]; + + ide_port_for_each_dev(i, drive, hwif) drive->crc_count++; - } } } } @@ -496,7 +497,7 @@ static int scc_dma_end(ide_drive_t *drive) /* returns 1 if dma irq issued, 0 otherwise */ static int scc_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u32 int_stat = in_be32((void __iomem *)hwif->dma_base + 0x014); /* SCC errata A252,A308 workaround: Step4 */ @@ -852,7 +853,6 @@ static const struct ide_tp_ops scc_tp_ops = { .exec_command = scc_exec_command, .read_status = scc_read_status, .read_altstatus = scc_read_altstatus, - .read_sff_dma_status = scc_read_sff_dma_status, .set_irq = scc_set_irq, @@ -879,6 +879,7 @@ static const struct ide_dma_ops scc_dma_ops = { .dma_test_irq = scc_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = scc_dma_sff_read_status, }; #define DECLARE_SCC_DEV(name_str) \ diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index 437bc919dafd..382102ba467b 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -151,7 +151,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed) static const u8 dma_modes[] = { 0x77, 0x21, 0x20 }; static const u8 drive_pci2[] = { 0x45, 0x44, 0x47, 0x46 }; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 unit = drive->dn & 1; diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 9f1f9163a136..e85d1ed29c2a 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -130,7 +130,7 @@ int ide_pci_check_simplex(ide_hwif_t *hwif, const struct ide_port_info *d) * we tune the drive then try to grab DMA ownership if we want to be * the DMA end. This has to be become dynamic to handle hot-plug. */ - dma_stat = hwif->tp_ops->read_sff_dma_status(hwif); + dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) { printk(KERN_INFO "%s %s: simplex device: DMA disabled\n", d->name, pci_name(dev)); @@ -377,6 +377,9 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d) hwif->dma_base = base; + if (hwif->dma_ops == NULL) + hwif->dma_ops = &sff_dma_ops; + if (ide_pci_check_simplex(hwif, d) < 0) return -1; @@ -393,8 +396,6 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d) if (ide_allocate_dma_engine(hwif)) return -1; - - hwif->dma_ops = &sff_dma_ops; } return 0; @@ -471,7 +472,7 @@ void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d, */ for (port = 0; port < channels; ++port) { - const ide_pci_enablebit_t *e = &(d->enablebits[port]); + const struct ide_pci_enablebit *e = &d->enablebits[port]; if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) || (tmp & e->mask) != e->val)) { @@ -519,8 +520,7 @@ static int do_ide_setup_pci_device(struct pci_dev *dev, if (ret < 0) goto out; - /* Is it an "IDE storage" device in non-PCI mode? */ - if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) { + if (ide_pci_is_in_compatibility_mode(dev)) { if (noisy) printk(KERN_INFO "%s %s: not 100%% native mode: will " "probe irqs later\n", d->name, pci_name(dev)); diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index a687a7dfea6f..fdb9d7037694 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -123,7 +123,7 @@ static int sgiioc4_clearirq(ide_drive_t * drive) { u32 intr_reg; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; unsigned long other_ir = io_ports->irq_addr + (IOC4_INTR_REG << 2); @@ -181,7 +181,7 @@ sgiioc4_clearirq(ide_drive_t * drive) static void sgiioc4_dma_start(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long ioc4_dma_addr = hwif->dma_base + IOC4_DMA_CTRL * 4; unsigned int reg = readl((void __iomem *)ioc4_dma_addr); unsigned int temp_reg = reg | IOC4_S_DMA_START; @@ -209,7 +209,7 @@ sgiioc4_ide_dma_stop(ide_hwif_t *hwif, u64 dma_base) static int sgiioc4_dma_end(ide_drive_t *drive) { u32 ioc4_dma, bc_dev, bc_mem, num, valid = 0, cnt = 0; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long dma_base = hwif->dma_base; int dma_stat = 0; unsigned long *ending_dma = ide_get_hwifdata(hwif); @@ -271,7 +271,7 @@ static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed) /* returns 1 if dma irq issued, 0 otherwise */ static int sgiioc4_dma_test_irq(ide_drive_t *drive) { - return sgiioc4_checkirq(HWIF(drive)); + return sgiioc4_checkirq(drive->hwif); } static void sgiioc4_dma_host_set(ide_drive_t *drive, int on) @@ -367,7 +367,7 @@ static void sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive) { u32 ioc4_dma; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long dma_base = hwif->dma_base; unsigned long ioc4_dma_addr = dma_base + IOC4_DMA_CTRL * 4; u32 dma_addr, ending_dma_addr; @@ -427,7 +427,7 @@ sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive) static unsigned int sgiioc4_build_dma_table(ide_drive_t * drive, struct request *rq, int ddir) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned int *table = hwif->dmatable_cpu; unsigned int count = 0, i = 1; struct scatterlist *sg; @@ -492,7 +492,7 @@ use_pio_instead: static int sgiioc4_dma_setup(ide_drive_t *drive) { - struct request *rq = HWGROUP(drive)->rq; + struct request *rq = drive->hwif->rq; unsigned int count = 0; int ddir; @@ -523,7 +523,6 @@ static const struct ide_tp_ops sgiioc4_tp_ops = { .exec_command = ide_exec_command, .read_status = sgiioc4_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c index 7d622d20bc4c..cb2b352b876b 100644 --- a/drivers/ide/siimage.c +++ b/drivers/ide/siimage.c @@ -114,7 +114,7 @@ static unsigned long siimage_selreg(ide_hwif_t *hwif, int r) static inline unsigned long siimage_seldev(ide_drive_t *drive, int r) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long base = (unsigned long)hwif->hwif_data; u8 unit = drive->dn & 1; @@ -243,7 +243,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) static const u16 tf_speed[] = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 }; static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 }; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); ide_drive_t *pair = ide_get_pair_dev(drive); u32 speedt = 0; @@ -300,7 +300,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed) static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 }; static const u16 dma[] = { 0x2208, 0x10C2, 0x10C1 }; - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); unsigned long base = (unsigned long)hwif->hwif_data; u16 ultra = 0, multi = 0; @@ -340,7 +340,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed) /* returns 1 if dma irq issued, 0 otherwise */ static int siimage_io_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 dma_altstat = 0; unsigned long addr = siimage_selreg(hwif, 1); @@ -367,7 +367,7 @@ static int siimage_io_dma_test_irq(ide_drive_t *drive) static int siimage_mmio_dma_test_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long addr = siimage_selreg(hwif, 0x1); void __iomem *sata_error_addr = (void __iomem *)hwif->sata_scr[SATA_ERROR_OFFSET]; @@ -717,6 +717,7 @@ static const struct ide_dma_ops sil_dma_ops = { .dma_test_irq = siimage_dma_test_irq, .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_sff_read_status = ide_dma_sff_read_status, }; #define DECLARE_SII_DEV(p_ops) \ diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c index ad32e18c5ba3..9ec1a4a4432c 100644 --- a/drivers/ide/sis5513.c +++ b/drivers/ide/sis5513.c @@ -274,7 +274,7 @@ static void sis_program_timings(ide_drive_t *drive, const u8 mode) static void config_drive_art_rwp(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 reg4bh = 0; u8 rw_prefetch = 0; diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c index 84dc33602ff8..48cc748c5043 100644 --- a/drivers/ide/sl82c105.c +++ b/drivers/ide/sl82c105.c @@ -140,7 +140,7 @@ static inline void sl82c105_reset_host(struct pci_dev *dev) */ static void sl82c105_dma_lost_irq(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u32 val, mask = hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA; u8 dma_cmd; @@ -177,7 +177,7 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive) */ static void sl82c105_dma_start(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int reg = 0x44 + drive->dn * 4; @@ -299,6 +299,7 @@ static const struct ide_dma_ops sl82c105_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = sl82c105_dma_lost_irq, .dma_timeout = sl82c105_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_port_info sl82c105_chipset __devinitdata = { diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c index 0f759e4ed779..40b4b94a4288 100644 --- a/drivers/ide/slc90e66.c +++ b/drivers/ide/slc90e66.c @@ -20,7 +20,7 @@ static DEFINE_SPINLOCK(slc90e66_lock); static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); int is_slave = drive->dn & 1; int master_port = hwif->channel ? 0x42 : 0x40; @@ -73,7 +73,7 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio) static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u8 maslave = hwif->channel ? 0x42 : 0x40; int sitre = 0, a_speed = 7 << (drive->dn * 4); diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c index 93e2cce4b296..84109f5a1632 100644 --- a/drivers/ide/tc86c001.c +++ b/drivers/ide/tc86c001.c @@ -15,7 +15,7 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long scr_port = hwif->config_data + (drive->dn ? 0x02 : 0x00); u16 mode, scr = inw(scr_port); @@ -62,13 +62,12 @@ static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio) */ static int tc86c001_timer_expiry(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; ide_expiry_t *expiry = ide_get_hwifdata(hwif); - ide_hwgroup_t *hwgroup = HWGROUP(drive); u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); /* Restore a higher level driver's expiry handler first. */ - hwgroup->expiry = expiry; + hwif->expiry = expiry; if ((dma_stat & 5) == 1) { /* DMA active and no interrupt */ unsigned long sc_base = hwif->config_data; @@ -110,11 +109,10 @@ static int tc86c001_timer_expiry(ide_drive_t *drive) static void tc86c001_dma_start(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); - ide_hwgroup_t *hwgroup = HWGROUP(drive); + ide_hwif_t *hwif = drive->hwif; unsigned long sc_base = hwif->config_data; unsigned long twcr_port = sc_base + (drive->dn ? 0x06 : 0x04); - unsigned long nsectors = hwgroup->rq->nr_sectors; + unsigned long nsectors = hwif->rq->nr_sectors; /* * We have to manually load the sector count and size into @@ -125,8 +123,8 @@ static void tc86c001_dma_start(ide_drive_t *drive) outw(SECTOR_SIZE / 2, twcr_port); /* Transfer Word Count 1/2 */ /* Install our timeout expiry hook, saving the current handler... */ - ide_set_hwifdata(hwif, hwgroup->expiry); - hwgroup->expiry = &tc86c001_timer_expiry; + ide_set_hwifdata(hwif, hwif->expiry); + hwif->expiry = &tc86c001_timer_expiry; ide_dma_start(drive); } @@ -190,6 +188,7 @@ static const struct ide_dma_ops tc86c001_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = ide_dma_sff_read_status, }; static const struct ide_port_info tc86c001_chipset __devinitdata = { diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c index b6ff40336aa9..8773c3ba7462 100644 --- a/drivers/ide/triflex.c +++ b/drivers/ide/triflex.c @@ -36,7 +36,7 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; struct pci_dev *dev = to_pci_dev(hwif->dev); u32 triflex_timings = 0; u16 timing = 0; diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c index 2a5ea90cf8b8..b6a1285a4021 100644 --- a/drivers/ide/trm290.c +++ b/drivers/ide/trm290.c @@ -144,7 +144,7 @@ static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma) { - ide_hwif_t *hwif = HWIF(drive); + ide_hwif_t *hwif = drive->hwif; u16 reg = 0; unsigned long flags; @@ -184,7 +184,7 @@ static void trm290_dma_exec_cmd(ide_drive_t *drive, u8 command) static int trm290_dma_setup(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; unsigned int count, rw; if (rq_data_dir(rq)) { @@ -222,15 +222,15 @@ static int trm290_dma_end(ide_drive_t *drive) drive->waiting_for_dma = 0; /* purge DMA mappings */ ide_destroy_dmatable(drive); - status = inw(HWIF(drive)->dma_base + 2); + status = inw(drive->hwif->dma_base + 2); + return status != 0x00ff; } static int trm290_dma_test_irq(ide_drive_t *drive) { - u16 status; + u16 status = inw(drive->hwif->dma_base + 2); - status = inw(HWIF(drive)->dma_base + 2); return status == 0x00ff; } diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 4a8c5a21bd4c..882f6f07c476 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -293,7 +293,7 @@ static int tx4939ide_dma_setup(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; void __iomem *base = TX4939IDE_BASE(hwif); - struct request *rq = hwif->hwgroup->rq; + struct request *rq = hwif->rq; u8 reading; int nent; @@ -397,6 +397,17 @@ static int tx4939ide_dma_test_irq(ide_drive_t *drive) return found; } +#ifdef __BIG_ENDIAN +static u8 tx4939ide_dma_sff_read_status(ide_hwif_t *hwif) +{ + void __iomem *base = TX4939IDE_BASE(hwif); + + return tx4939ide_readb(base, TX4939IDE_DMA_Stat); +} +#else +#define tx4939ide_dma_sff_read_status ide_dma_sff_read_status +#endif + static void tx4939ide_init_hwif(ide_hwif_t *hwif) { void __iomem *base = TX4939IDE_BASE(hwif); @@ -443,13 +454,6 @@ static void tx4939ide_tf_load_fixup(ide_drive_t *drive, ide_task_t *task) #ifdef __BIG_ENDIAN -static u8 tx4939ide_read_sff_dma_status(ide_hwif_t *hwif) -{ - void __iomem *base = TX4939IDE_BASE(hwif); - - return tx4939ide_readb(base, TX4939IDE_DMA_Stat); -} - /* custom iops (independent from SWAP_IO_SPACE) */ static u8 tx4939ide_inb(unsigned long port) { @@ -585,7 +589,6 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = tx4939ide_read_sff_dma_status, .set_irq = ide_set_irq, @@ -609,7 +612,6 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, @@ -638,6 +640,7 @@ static const struct ide_dma_ops tx4939ide_dma_ops = { .dma_test_irq = tx4939ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timeout = ide_dma_timeout, + .dma_sff_read_status = tx4939ide_dma_sff_read_status, }; static const struct ide_port_info tx4939ide_port_info __initdata = { diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c index e29978cf6197..0608d41fb6d0 100644 --- a/drivers/ide/umc8672.c +++ b/drivers/ide/umc8672.c @@ -106,22 +106,21 @@ static void umc_set_speeds(u8 speeds[]) static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio) { - ide_hwif_t *hwif = drive->hwif; - ide_hwgroup_t *mate_hwgroup = hwif->mate ? hwif->mate->hwgroup : NULL; + ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate; unsigned long uninitialized_var(flags); printk("%s: setting umc8672 to PIO mode%d (speed %d)\n", drive->name, pio, pio_to_umc[pio]); - if (mate_hwgroup) - spin_lock_irqsave(&mate_hwgroup->lock, flags); - if (mate_hwgroup && mate_hwgroup->handler) { + if (mate) + spin_lock_irqsave(&mate->lock, flags); + if (mate && mate->handler) { printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n"); } else { current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio]; umc_set_speeds(current_speeds); } - if (mate_hwgroup) - spin_unlock_irqrestore(&mate_hwgroup->lock, flags); + if (mate) + spin_unlock_irqrestore(&mate->lock, flags); } static const struct ide_port_ops umc8672_port_ops = { diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index 2a812d3207e9..fecc0e03c3fc 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -178,7 +178,7 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed) ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); } - via_set_speed(HWIF(drive), drive->dn, &t); + via_set_speed(hwif, drive->dn, &t); } /** diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 22bf981d393b..82607add69a9 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -554,7 +554,7 @@ thermostat_init(void) const u32 *prop; int i = 0, offset = 0; int err; - + np = of_find_node_by_name(NULL, "fan"); if (!np) return -ENODEV; @@ -613,13 +613,13 @@ thermostat_init(void) } of_dev = of_platform_device_create(np, "temperatures", NULL); - + of_node_put(np); + if (of_dev == NULL) { printk(KERN_ERR "Can't register temperatures device !\n"); - of_node_put(np); return -ENODEV; } - + err = device_create_file(&of_dev->dev, &dev_attr_sensor1_temperature); err |= device_create_file(&of_dev->dev, &dev_attr_sensor2_temperature); err |= device_create_file(&of_dev->dev, &dev_attr_sensor1_limit); diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c index 65d69665f1fc..6a32680dbb1b 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.c +++ b/drivers/media/dvb/dvb-core/dvbdev.c @@ -79,6 +79,10 @@ static int dvb_device_open(struct inode *inode, struct file *file) file->private_data = dvbdev; old_fops = file->f_op; file->f_op = fops_get(dvbdev->fops); + if (file->f_op == NULL) { + file->f_op = old_fops; + goto fail; + } if(file->f_op->open) err = file->f_op->open(inode,file); if (err) { @@ -90,6 +94,7 @@ static int dvb_device_open(struct inode *inode, struct file *file) unlock_kernel(); return err; } +fail: up_read(&minor_rwsem); unlock_kernel(); return -ENODEV; diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c index d450cab20be4..b617bf05e2d7 100644 --- a/drivers/media/video/v4l1-compat.c +++ b/drivers/media/video/v4l1-compat.c @@ -203,7 +203,6 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq) table = &pwq->pt; for (;;) { int mask; - set_current_state(TASK_INTERRUPTIBLE); mask = file->f_op->poll(file, table); if (mask & POLLIN) break; @@ -212,9 +211,8 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq) retval = -ERESTARTSYS; break; } - schedule(); + poll_schedule(pwq, TASK_INTERRUPTIBLE); } - set_current_state(TASK_RUNNING); poll_freewait(pwq); return retval; } diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c index a7dd03e8d332..0ee4264f5db7 100644 --- a/drivers/message/i2o/device.c +++ b/drivers/message/i2o/device.c @@ -52,7 +52,6 @@ static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd, /** * i2o_device_claim - claim a device for use by an OSM * @dev: I2O device to claim - * @drv: I2O driver which wants to claim the device * * Do the leg work to assign a device to a given OSM. If the claim succeeds, * the owner is the primary. If the attempt fails a negative errno code @@ -80,7 +79,6 @@ int i2o_device_claim(struct i2o_device *dev) /** * i2o_device_claim_release - release a device that the OSM is using * @dev: device to release - * @drv: driver which claimed the device * * Drop a claim by an OSM on a given I2O device. * diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index e0d474b17433..a0421efe04ca 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -173,7 +173,6 @@ void i2o_driver_unregister(struct i2o_driver *drv) * i2o_driver_dispatch - dispatch an I2O reply message * @c: I2O controller of the message * @m: I2O message number - * @msg: I2O message to be delivered * * The reply is delivered to the driver from which the original message * was. This function is only called from interrupt context. diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c index b5f6add34b0b..dc14b0b9cbfa 100644 --- a/drivers/misc/ibmasm/module.c +++ b/drivers/misc/ibmasm/module.c @@ -104,8 +104,7 @@ static int __devinit ibmasm_init_one(struct pci_dev *pdev, const struct pci_devi } sp->irq = pdev->irq; - sp->base_address = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); + sp->base_address = pci_ioremap_bar(pdev, 0); if (!sp->base_address) { dev_err(sp->dev, "Failed to ioremap pci memory\n"); result = -ENODEV; diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c index 6f76573e7c8a..60b0b1a4fb3a 100644 --- a/drivers/misc/ioc4.c +++ b/drivers/misc/ioc4.c @@ -269,6 +269,16 @@ ioc4_variant(struct ioc4_driver_data *idd) return IOC4_VARIANT_PCI_RT; } +static void +ioc4_load_modules(struct work_struct *work) +{ + /* arg just has to be freed */ + + request_module("sgiioc4"); + + kfree(work); +} + /* Adds a new instance of an IOC4 card */ static int ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) @@ -378,6 +388,30 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) } mutex_unlock(&ioc4_mutex); + /* Request sgiioc4 IDE driver on boards that bring that functionality + * off of IOC4. The root filesystem may be hosted on a drive connected + * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it + * won't be picked up by modprobes due to the ioc4 module owning the + * PCI device. + */ + if (idd->idd_variant != IOC4_VARIANT_PCI_RT) { + struct work_struct *work; + work = kzalloc(sizeof(struct work_struct), GFP_KERNEL); + if (!work) { + printk(KERN_WARNING + "%s: IOC4 unable to allocate memory for " + "load of sub-modules.\n", __func__); + } else { + /* Request the module from a work procedure as the + * modprobe goes out to a userland helper and that + * will hang if done directly from ioc4_probe(). + */ + printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n"); + INIT_WORK(work, ioc4_load_modules); + schedule_work(work); + } + } + return 0; out_misc_region: @@ -462,6 +496,8 @@ ioc4_init(void) static void __devexit ioc4_exit(void) { + /* Ensure ioc4_load_modules() has completed before exiting */ + flush_scheduled_work(); pci_unregister_driver(&ioc4_driver); } diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c index e71eba31decb..be5672a98702 100644 --- a/drivers/misc/tifm_7xx1.c +++ b/drivers/misc/tifm_7xx1.c @@ -354,8 +354,7 @@ static int tifm_7xx1_probe(struct pci_dev *dev, fm->has_ms_pif = tifm_7xx1_has_ms_pif; pci_set_drvdata(dev, fm); - fm->addr = ioremap(pci_resource_start(dev, 0), - pci_resource_len(dev, 0)); + fm->addr = pci_ioremap_bar(dev, 0); if (!fm->addr) goto err_out_free; diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c index ac2a805ac7ea..8901ecf6e037 100644 --- a/drivers/parport/ieee1284.c +++ b/drivers/parport/ieee1284.c @@ -84,7 +84,7 @@ int parport_wait_event (struct parport *port, signed long timeout) add_timer (&timer); ret = down_interruptible (&port->physport->ieee1284.irq); - if (!del_timer (&timer) && !ret) + if (!del_timer_sync(&timer) && !ret) /* Timed out. */ ret = 1; diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index 956d3e79f6aa..addb87cf44d9 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -79,7 +79,6 @@ void rio_dev_put(struct rio_dev *rdev) /** * rio_device_probe - Tell if a RIO device structure has a matching RIO device id structure - * @id: the RIO device id structure to match against * @dev: the RIO device structure to match against * * return 0 and set rio_dev->driver when drv claims rio_dev, else error diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 165a81843357..4ad831de41ad 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -35,8 +35,8 @@ config RTC_HCTOSYS_DEVICE default "rtc0" help The RTC device that will be used to (re)initialize the system - clock, usually rtc0. Initialization is done when the system - starts up, and when it resumes from a low power state. This + clock, usually rtc0. Initialization is done when the system + starts up, and when it resumes from a low power state. This device should record time in UTC, since the kernel won't do timezone correction. @@ -44,7 +44,7 @@ config RTC_HCTOSYS_DEVICE functions run, so it must usually be statically linked. This clock should be battery-backed, so that it reads the correct - time when the system boots from a power-off state. Otherwise, your + time when the system boots from a power-off state. Otherwise, your system will need an external clock source (like an NTP server). If the clock you specify here is not battery backed, it may still @@ -69,8 +69,7 @@ config RTC_INTF_SYSFS Say yes here if you want to use your RTCs using sysfs interfaces, /sys/class/rtc/rtc0 through /sys/.../rtcN. - This driver can also be built as a module. If so, the module - will be called rtc-sysfs. + If unsure, say Y. config RTC_INTF_PROC boolean "/proc/driver/rtc (procfs for rtc0)" @@ -78,11 +77,10 @@ config RTC_INTF_PROC default RTC_CLASS help Say yes here if you want to use your first RTC through the proc - interface, /proc/driver/rtc. Other RTCs will not be available + interface, /proc/driver/rtc. Other RTCs will not be available through that API. - This driver can also be built as a module. If so, the module - will be called rtc-proc. + If unsure, say Y. config RTC_INTF_DEV boolean "/dev/rtcN (character devices)" @@ -90,12 +88,14 @@ config RTC_INTF_DEV help Say yes here if you want to use your RTCs using the /dev interfaces, which "udev" sets up as /dev/rtc0 through - /dev/rtcN. You may want to set up a symbolic link so one - of these can be accessed as /dev/rtc, which is a name - expected by "hwclock" and some other programs. + /dev/rtcN. - This driver can also be built as a module. If so, the module - will be called rtc-dev. + You may want to set up a symbolic link so one of these + can be accessed as /dev/rtc, which is a name + expected by "hwclock" and some other programs. Recent + versions of "udev" are known to set up the symlink for you. + + If unsure, say Y. config RTC_INTF_DEV_UIE_EMUL bool "RTC UIE emulation on dev interface" @@ -132,14 +132,14 @@ config RTC_DRV_DS1307 tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00" help If you say yes here you get support for various compatible RTC - chips (often with battery backup) connected with I2C. This driver + chips (often with battery backup) connected with I2C. This driver should handle DS1307, DS1337, DS1338, DS1339, DS1340, ST M41T00, - and probably other chips. In some cases the RTC must already + and probably other chips. In some cases the RTC must already have been initialized (by manufacturing or a bootloader). The first seven registers on these chips hold an RTC, and other registers may add features such as NVRAM, a trickle charger for - the RTC/NVRAM backup power, and alarms. NVRAM is visible in + the RTC/NVRAM backup power, and alarms. NVRAM is visible in sysfs, but other chip features may not be available. This driver can also be built as a module. If so, the module @@ -150,10 +150,10 @@ config RTC_DRV_DS1374 depends on RTC_CLASS && I2C help If you say yes here you get support for Dallas Semiconductor - DS1374 real-time clock chips. If an interrupt is associated + DS1374 real-time clock chips. If an interrupt is associated with the device, the alarm functionality is supported. - This driver can also be built as a module. If so, the module + This driver can also be built as a module. If so, the module will be called rtc-ds1374. config RTC_DRV_DS1672 @@ -247,7 +247,7 @@ config RTC_DRV_TWL92330 help If you say yes here you get support for the RTC on the TWL92330 "Menelaus" power management chip, used with OMAP2 - platforms. The support is integrated with the rest of + platforms. The support is integrated with the rest of the Menelaus driver; it's not separate module. config RTC_DRV_TWL4030 @@ -308,7 +308,7 @@ config RTC_DRV_DS1305 tristate "Dallas/Maxim DS1305/DS1306" help Select this driver to get support for the Dallas/Maxim DS1305 - and DS1306 real time clock chips. These support a trickle + and DS1306 real time clock chips. These support a trickle charger, alarms, and NVRAM in addition to the clock. This driver can also be built as a module. If so, the module @@ -317,7 +317,8 @@ config RTC_DRV_DS1305 config RTC_DRV_DS1390 tristate "Dallas/Maxim DS1390/93/94" help - If you say yes here you get support for the DS1390/93/94 chips. + If you say yes here you get support for the + Dallas/Maxim DS1390/93/94 chips. This driver only supports the RTC feature, and not other chip features such as alarms and trickle charging. @@ -381,7 +382,7 @@ config RTC_DRV_CMOS or LPC bus chips, and so on. Your system will need to define the platform device used by - this driver, otherwise it won't be accessible. This means + this driver, otherwise it won't be accessible. This means you can safely enable this driver if you don't know whether or not your board has this kind of hardware. @@ -598,7 +599,7 @@ config RTC_DRV_AT91RM9200 depends on ARCH_AT91RM9200 || ARCH_AT91SAM9RL help Driver for the internal RTC (Realtime Clock) module found on - Atmel AT91RM9200's and AT91SAM9RL chips. On SAM9RL chips + Atmel AT91RM9200's and AT91SAM9RL chips. On SAM9RL chips this is powered by the backup power supply. config RTC_DRV_AT91SAM9 @@ -620,8 +621,8 @@ config RTC_DRV_AT91SAM9_RTT prompt "RTT module Number" if ARCH_AT91SAM9263 depends on RTC_DRV_AT91SAM9 help - More than one RTT module is available. You can choose which - one will be used as an RTC. The default of zero is normally + More than one RTT module is available. You can choose which + one will be used as an RTC. The default of zero is normally OK to use, though some systems use that for non-RTC purposes. config RTC_DRV_AT91SAM9_GPBR @@ -633,10 +634,20 @@ config RTC_DRV_AT91SAM9_GPBR depends on RTC_DRV_AT91SAM9 help The RTC driver needs to use one of the General Purpose Backup - Registers (GPBRs) as well as the RTT. You can choose which one - will be used. The default of zero is normally OK to use, but + Registers (GPBRs) as well as the RTT. You can choose which one + will be used. The default of zero is normally OK to use, but on some systems other software needs to use that register. +config RTC_DRV_AU1XXX + tristate "Au1xxx Counter0 RTC support" + depends on SOC_AU1X00 + help + This is a driver for the Au1xxx on-chip Counter0 (Time-Of-Year + counter) to be used as a RTC. + + This driver can also be built as a module. If so, the module + will be called rtc-au1xxx. + config RTC_DRV_BFIN tristate "Blackfin On-Chip RTC" depends on BLACKFIN && !BF561 @@ -669,6 +680,17 @@ config RTC_DRV_PPC the RTC. This exposes that functionality through the generic RTC class. +config RTC_DRV_PXA + tristate "PXA27x/PXA3xx" + depends on ARCH_PXA + help + If you say Y here you will get access to the real time clock + built into your PXA27x or PXA3xx CPU. + + This RTC driver uses PXA RTC registers available since pxa27x + series (RDxR, RYxR) instead of legacy RCNR, RTAR. + + config RTC_DRV_SUN4V bool "SUN4V Hypervisor RTC" depends on SPARC64 @@ -683,4 +705,22 @@ config RTC_DRV_STARFIRE If you say Y here you will get support for the RTC found on Starfire systems. +config RTC_DRV_TX4939 + tristate "TX4939 SoC" + depends on SOC_TX4939 + help + Driver for the internal RTC (Realtime Clock) module found on + Toshiba TX4939 SoC. + +config RTC_DRV_MV + tristate "Marvell SoC RTC" + depends on ARCH_KIRKWOOD + help + If you say yes here you will get support for the in-chip RTC + that can be found in some of Marvell's SoC devices, such as + the Kirkwood 88F6281 and 88F6192. + + This driver can also be built as a module. If so, the module + will be called rtc-mv. + endif # RTC_CLASS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 6e79c912bf9e..9a4340d48f26 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -20,6 +20,7 @@ rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o obj-$(CONFIG_RTC_DRV_AT91SAM9) += rtc-at91sam9.o +obj-$(CONFIG_RTC_DRV_AU1XXX) += rtc-au1xxx.o obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o @@ -47,6 +48,7 @@ obj-$(CONFIG_RTC_DRV_SUN4V) += rtc-sun4v.o obj-$(CONFIG_RTC_DRV_STARFIRE) += rtc-starfire.o obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o +obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o @@ -54,6 +56,7 @@ obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o obj-$(CONFIG_RTC_DRV_PARISC) += rtc-parisc.o obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o +obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o @@ -66,6 +69,7 @@ obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o obj-$(CONFIG_RTC_DRV_TWL4030) += rtc-twl4030.o +obj-$(CONFIG_RTC_DRV_TX4939) += rtc-tx4939.o obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 4dfdf019fccc..be5a6b73e601 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -48,9 +48,7 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg) struct rtc_time tm; struct timespec ts = current_kernel_time(); - if (strncmp(rtc->dev.bus_id, - CONFIG_RTC_HCTOSYS_DEVICE, - BUS_ID_SIZE) != 0) + if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; rtc_read_time(rtc, &tm); @@ -71,20 +69,18 @@ static int rtc_resume(struct device *dev) time_t newtime; struct timespec time; - if (strncmp(rtc->dev.bus_id, - CONFIG_RTC_HCTOSYS_DEVICE, - BUS_ID_SIZE) != 0) + if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; rtc_read_time(rtc, &tm); if (rtc_valid_tm(&tm) != 0) { - pr_debug("%s: bogus resume time\n", rtc->dev.bus_id); + pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev)); return 0; } rtc_tm_to_time(&tm, &newtime); if (newtime <= oldtime) { if (newtime < oldtime) - pr_debug("%s: time travel!\n", rtc->dev.bus_id); + pr_debug("%s: time travel!\n", dev_name(&rtc->dev)); return 0; } @@ -156,7 +152,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, init_waitqueue_head(&rtc->irq_queue); strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE); - snprintf(rtc->dev.bus_id, BUS_ID_SIZE, "rtc%d", id); + dev_set_name(&rtc->dev, "rtc%d", id); rtc_dev_prepare(rtc); @@ -169,7 +165,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, rtc_proc_add_device(rtc); dev_info(dev, "rtc core: registered %s as %s\n", - rtc->name, rtc->dev.bus_id); + rtc->name, dev_name(&rtc->dev)); return rtc; diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index fd2c652504ff..4348c4b0d453 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -50,10 +50,15 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) if (!rtc->ops) err = -ENODEV; - else if (!rtc->ops->set_time) - err = -EINVAL; - else + else if (rtc->ops->set_time) err = rtc->ops->set_time(rtc->dev.parent, tm); + else if (rtc->ops->set_mmss) { + unsigned long secs; + err = rtc_tm_to_time(tm, &secs); + if (err == 0) + err = rtc->ops->set_mmss(rtc->dev.parent, secs); + } else + err = -EINVAL; mutex_unlock(&rtc->ops_lock); return err; @@ -389,7 +394,7 @@ static int __rtc_match(struct device *dev, void *data) { char *name = (char *)data; - if (strncmp(dev->bus_id, name, BUS_ID_SIZE) == 0) + if (strcmp(dev_name(dev), name) == 0) return 1; return 0; } @@ -504,9 +509,6 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) if (rtc->ops->irq_set_freq == NULL) return -ENXIO; - if (!is_power_of_2(freq)) - return -EINVAL; - spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c index 90b9a6503e15..e1ec33e40e38 100644 --- a/drivers/rtc/rtc-at32ap700x.c +++ b/drivers/rtc/rtc-at32ap700x.c @@ -205,7 +205,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev) { struct resource *regs; struct rtc_at32ap700x *rtc; - int irq = -1; + int irq; int ret; rtc = kzalloc(sizeof(struct rtc_at32ap700x), GFP_KERNEL); @@ -222,7 +222,7 @@ static int __init at32_rtc_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq < 0) { + if (irq <= 0) { dev_dbg(&pdev->dev, "could not get irq\n"); ret = -ENXIO; goto out; diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c new file mode 100644 index 000000000000..8906a688e6a6 --- /dev/null +++ b/drivers/rtc/rtc-au1xxx.c @@ -0,0 +1,153 @@ +/* + * Au1xxx counter0 (aka Time-Of-Year counter) RTC interface driver. + * + * Copyright (C) 2008 Manuel Lauss <mano@roarinelk.homelinux.net> + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +/* All current Au1xxx SoCs have 2 counters fed by an external 32.768 kHz + * crystal. Counter 0, which keeps counting during sleep/powerdown, is + * used to count seconds since the beginning of the unix epoch. + * + * The counters must be configured and enabled by bootloader/board code; + * no checks as to whether they really get a proper 32.768kHz clock are + * made as this would take far too long. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/rtc.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/io.h> +#include <asm/mach-au1x00/au1000.h> + +/* 32kHz clock enabled and detected */ +#define CNTR_OK (SYS_CNTRL_E0 | SYS_CNTRL_32S) + +static int au1xtoy_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + unsigned long t; + + t = au_readl(SYS_TOYREAD); + + rtc_time_to_tm(t, tm); + + return rtc_valid_tm(tm); +} + +static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + unsigned long t; + + rtc_tm_to_time(tm, &t); + + au_writel(t, SYS_TOYWRITE); + au_sync(); + + /* wait for the pending register write to succeed. This can + * take up to 6 seconds... + */ + while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S) + msleep(1); + + return 0; +} + +static struct rtc_class_ops au1xtoy_rtc_ops = { + .read_time = au1xtoy_rtc_read_time, + .set_time = au1xtoy_rtc_set_time, +}; + +static int __devinit au1xtoy_rtc_probe(struct platform_device *pdev) +{ + struct rtc_device *rtcdev; + unsigned long t; + int ret; + + t = au_readl(SYS_COUNTER_CNTRL); + if (!(t & CNTR_OK)) { + dev_err(&pdev->dev, "counters not working; aborting.\n"); + ret = -ENODEV; + goto out_err; + } + + ret = -ETIMEDOUT; + + /* set counter0 tickrate to 1Hz if necessary */ + if (au_readl(SYS_TOYTRIM) != 32767) { + /* wait until hardware gives access to TRIM register */ + t = 0x00100000; + while ((au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_T0S) && t--) + msleep(1); + + if (!t) { + /* timed out waiting for register access; assume + * counters are unusable. + */ + dev_err(&pdev->dev, "timeout waiting for access\n"); + goto out_err; + } + + /* set 1Hz TOY tick rate */ + au_writel(32767, SYS_TOYTRIM); + au_sync(); + } + + /* wait until the hardware allows writes to the counter reg */ + while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S) + msleep(1); + + rtcdev = rtc_device_register("rtc-au1xxx", &pdev->dev, + &au1xtoy_rtc_ops, THIS_MODULE); + if (IS_ERR(rtcdev)) { + ret = PTR_ERR(rtcdev); + goto out_err; + } + + platform_set_drvdata(pdev, rtcdev); + + return 0; + +out_err: + return ret; +} + +static int __devexit au1xtoy_rtc_remove(struct platform_device *pdev) +{ + struct rtc_device *rtcdev = platform_get_drvdata(pdev); + + rtc_device_unregister(rtcdev); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver au1xrtc_driver = { + .driver = { + .name = "rtc-au1xxx", + .owner = THIS_MODULE, + }, + .remove = __devexit_p(au1xtoy_rtc_remove), +}; + +static int __init au1xtoy_rtc_init(void) +{ + return platform_driver_probe(&au1xrtc_driver, au1xtoy_rtc_probe); +} + +static void __exit au1xtoy_rtc_exit(void) +{ + platform_driver_unregister(&au1xrtc_driver); +} + +module_init(au1xtoy_rtc_init); +module_exit(au1xtoy_rtc_exit); + +MODULE_DESCRIPTION("Au1xxx TOY-counter-based RTC driver"); +MODULE_AUTHOR("Manuel Lauss <manuel.lauss@gmail.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:rtc-au1xxx"); diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index 34439ce3967e..aafd3e6ebb0d 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -390,7 +390,7 @@ static int __devinit bfin_rtc_probe(struct platform_device *pdev) /* Register our RTC with the RTC framework */ rtc->rtc_dev = rtc_device_register(pdev->name, dev, &bfin_rtc_ops, THIS_MODULE); - if (unlikely(IS_ERR(rtc))) { + if (unlikely(IS_ERR(rtc->rtc_dev))) { ret = PTR_ERR(rtc->rtc_dev); goto err_irq; } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 6cf8e282338f..b6d35f50e404 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -35,6 +35,7 @@ #include <linux/spinlock.h> #include <linux/platform_device.h> #include <linux/mod_devicetable.h> +#include <linux/log2.h> /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ #include <asm-generic/rtc.h> @@ -58,7 +59,7 @@ struct cmos_rtc { }; /* both platform and pnp busses use negative numbers for invalid irqs */ -#define is_valid_irq(n) ((n) >= 0) +#define is_valid_irq(n) ((n) > 0) static const char driver_name[] = "rtc_cmos"; @@ -384,6 +385,8 @@ static int cmos_irq_set_freq(struct device *dev, int freq) if (!is_valid_irq(cmos->irq)) return -ENXIO; + if (!is_power_of_2(freq)) + return -EINVAL; /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */ f = ffs(freq); if (f-- > 16) @@ -729,7 +732,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) cmos_rtc.dev = dev; dev_set_drvdata(dev, &cmos_rtc); - rename_region(ports, cmos_rtc.rtc->dev.bus_id); + rename_region(ports, dev_name(&cmos_rtc.rtc->dev)); spin_lock_irq(&rtc_lock); @@ -777,7 +780,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) rtc_cmos_int_handler = cmos_interrupt; retval = request_irq(rtc_irq, rtc_cmos_int_handler, - IRQF_DISABLED, cmos_rtc.rtc->dev.bus_id, + IRQF_DISABLED, dev_name(&cmos_rtc.rtc->dev), cmos_rtc.rtc); if (retval < 0) { dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); @@ -795,7 +798,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) } pr_info("%s: alarms up to one %s%s, %zd bytes nvram%s\n", - cmos_rtc.rtc->dev.bus_id, + dev_name(&cmos_rtc.rtc->dev), is_valid_irq(rtc_irq) ? (cmos_rtc.mon_alrm ? "year" @@ -885,7 +888,7 @@ static int cmos_suspend(struct device *dev, pm_message_t mesg) } pr_debug("%s: suspend%s, ctrl %02x\n", - cmos_rtc.rtc->dev.bus_id, + dev_name(&cmos_rtc.rtc->dev), (tmp & RTC_AIE) ? ", alarm may wake" : "", tmp); @@ -941,7 +944,7 @@ static int cmos_resume(struct device *dev) } pr_debug("%s: resume, ctrl %02x\n", - cmos_rtc.rtc->dev.bus_id, + dev_name(&cmos_rtc.rtc->dev), tmp); return 0; diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c index 9a234a4ec06d..4aedc705518c 100644 --- a/drivers/rtc/rtc-ds1216.c +++ b/drivers/rtc/rtc-ds1216.c @@ -10,7 +10,7 @@ #include <linux/platform_device.h> #include <linux/bcd.h> -#define DRV_VERSION "0.1" +#define DRV_VERSION "0.2" struct ds1216_regs { u8 tsec; @@ -101,7 +101,8 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm) tm->tm_year = bcd2bin(regs.year); if (tm->tm_year < 70) tm->tm_year += 100; - return 0; + + return rtc_valid_tm(tm); } static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm) @@ -138,9 +139,8 @@ static const struct rtc_class_ops ds1216_rtc_ops = { .set_time = ds1216_rtc_set_time, }; -static int __devinit ds1216_rtc_probe(struct platform_device *pdev) +static int __init ds1216_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc; struct resource *res; struct ds1216_priv *priv; int ret = 0; @@ -152,7 +152,10 @@ static int __devinit ds1216_rtc_probe(struct platform_device *pdev) priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) return -ENOMEM; - priv->size = res->end - res->start + 1; + + platform_set_drvdata(pdev, priv); + + priv->size = resource_size(res); if (!request_mem_region(res->start, priv->size, pdev->name)) { ret = -EBUSY; goto out; @@ -163,22 +166,18 @@ static int __devinit ds1216_rtc_probe(struct platform_device *pdev) ret = -ENOMEM; goto out; } - rtc = rtc_device_register("ds1216", &pdev->dev, + priv->rtc = rtc_device_register("ds1216", &pdev->dev, &ds1216_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - ret = PTR_ERR(rtc); + if (IS_ERR(priv->rtc)) { + ret = PTR_ERR(priv->rtc); goto out; } - priv->rtc = rtc; - platform_set_drvdata(pdev, priv); /* dummy read to get clock into a known state */ ds1216_read(priv->ioaddr, dummy); return 0; out: - if (priv->rtc) - rtc_device_unregister(priv->rtc); if (priv->ioaddr) iounmap(priv->ioaddr); if (priv->baseaddr) @@ -187,7 +186,7 @@ out: return ret; } -static int __devexit ds1216_rtc_remove(struct platform_device *pdev) +static int __exit ds1216_rtc_remove(struct platform_device *pdev) { struct ds1216_priv *priv = platform_get_drvdata(pdev); @@ -203,13 +202,12 @@ static struct platform_driver ds1216_rtc_platform_driver = { .name = "rtc-ds1216", .owner = THIS_MODULE, }, - .probe = ds1216_rtc_probe, - .remove = __devexit_p(ds1216_rtc_remove), + .remove = __exit_p(ds1216_rtc_remove), }; static int __init ds1216_rtc_init(void) { - return platform_driver_register(&ds1216_rtc_platform_driver); + return platform_driver_probe(&ds1216_rtc_platform_driver, ds1216_rtc_probe); } static void __exit ds1216_rtc_exit(void) diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c index 599e976bf014..e54b5c619bdf 100644 --- a/drivers/rtc/rtc-ds1390.c +++ b/drivers/rtc/rtc-ds1390.c @@ -1,5 +1,5 @@ /* - * rtc-ds1390.c -- driver for DS1390/93/94 + * rtc-ds1390.c -- driver for the Dallas/Maxim DS1390/93/94 SPI RTC * * Copyright (C) 2008 Mercury IMC Ltd * Written by Mark Jackson <mpfj@mimc.co.uk> @@ -8,11 +8,13 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * NOTE : Currently this driver only supports the bare minimum for read - * and write the RTC. The extra features provided by the chip family + * NOTE: Currently this driver only supports the bare minimum for read + * and write the RTC. The extra features provided by the chip family * (alarms, trickle charger, different control registers) are unavailable. */ +#include <linux/init.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/rtc.h> #include <linux/spi/spi.h> @@ -42,20 +44,6 @@ struct ds1390 { u8 txrx_buf[9]; /* cmd + 8 registers */ }; -static void ds1390_set_reg(struct device *dev, unsigned char address, - unsigned char data) -{ - struct spi_device *spi = to_spi_device(dev); - struct ds1390 *chip = dev_get_drvdata(dev); - - /* Set MSB to indicate write */ - chip->txrx_buf[0] = address | 0x80; - chip->txrx_buf[1] = data; - - /* do the i/o */ - spi_write_then_read(spi, chip->txrx_buf, 2, NULL, 0); -} - static int ds1390_get_reg(struct device *dev, unsigned char address, unsigned char *data) { @@ -78,7 +66,7 @@ static int ds1390_get_reg(struct device *dev, unsigned char address, return 0; } -static int ds1390_get_datetime(struct device *dev, struct rtc_time *dt) +static int ds1390_read_time(struct device *dev, struct rtc_time *dt) { struct spi_device *spi = to_spi_device(dev); struct ds1390 *chip = dev_get_drvdata(dev); @@ -107,7 +95,7 @@ static int ds1390_get_datetime(struct device *dev, struct rtc_time *dt) return rtc_valid_tm(dt); } -static int ds1390_set_datetime(struct device *dev, struct rtc_time *dt) +static int ds1390_set_time(struct device *dev, struct rtc_time *dt) { struct spi_device *spi = to_spi_device(dev); struct ds1390 *chip = dev_get_drvdata(dev); @@ -127,16 +115,6 @@ static int ds1390_set_datetime(struct device *dev, struct rtc_time *dt) return spi_write_then_read(spi, chip->txrx_buf, 8, NULL, 0); } -static int ds1390_read_time(struct device *dev, struct rtc_time *tm) -{ - return ds1390_get_datetime(dev, tm); -} - -static int ds1390_set_time(struct device *dev, struct rtc_time *tm) -{ - return ds1390_set_datetime(dev, tm); -} - static const struct rtc_class_ops ds1390_rtc_ops = { .read_time = ds1390_read_time, .set_time = ds1390_set_time, @@ -149,46 +127,40 @@ static int __devinit ds1390_probe(struct spi_device *spi) struct ds1390 *chip; int res; - printk(KERN_DEBUG "DS1390 SPI RTC driver\n"); - - rtc = rtc_device_register("ds1390", - &spi->dev, &ds1390_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - printk(KERN_ALERT "RTC : unable to register device\n"); - return PTR_ERR(rtc); - } - spi->mode = SPI_MODE_3; spi->bits_per_word = 8; spi_setup(spi); chip = kzalloc(sizeof *chip, GFP_KERNEL); if (!chip) { - printk(KERN_ALERT "RTC : unable to allocate device memory\n"); - rtc_device_unregister(rtc); + dev_err(&spi->dev, "unable to allocate device memory\n"); return -ENOMEM; } - chip->rtc = rtc; dev_set_drvdata(&spi->dev, chip); res = ds1390_get_reg(&spi->dev, DS1390_REG_SECONDS, &tmp); - if (res) { - printk(KERN_ALERT "RTC : unable to read device\n"); - rtc_device_unregister(rtc); + if (res != 0) { + dev_err(&spi->dev, "unable to read device\n"); + kfree(chip); return res; } - return 0; + chip->rtc = rtc_device_register("ds1390", + &spi->dev, &ds1390_rtc_ops, THIS_MODULE); + if (IS_ERR(chip->rtc)) { + dev_err(&spi->dev, "unable to register device\n"); + res = PTR_ERR(chip->rtc); + kfree(chip); + } + + return res; } static int __devexit ds1390_remove(struct spi_device *spi) { struct ds1390 *chip = platform_get_drvdata(spi); - struct rtc_device *rtc = chip->rtc; - - if (rtc) - rtc_device_unregister(rtc); + rtc_device_unregister(chip->rtc); kfree(chip); return 0; @@ -215,6 +187,6 @@ static __exit void ds1390_exit(void) } module_exit(ds1390_exit); -MODULE_DESCRIPTION("DS1390/93/94 SPI RTC driver"); +MODULE_DESCRIPTION("Dallas/Maxim DS1390/93/94 SPI RTC driver"); MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>"); MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c index 25caada78398..23a07fe15a2c 100644 --- a/drivers/rtc/rtc-ds1511.c +++ b/drivers/rtc/rtc-ds1511.c @@ -326,9 +326,9 @@ ds1511_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) { + if (pdata->irq <= 0) return -EINVAL; - } + pdata->alrm_mday = alrm->time.tm_mday; pdata->alrm_hour = alrm->time.tm_hour; pdata->alrm_min = alrm->time.tm_min; @@ -346,9 +346,9 @@ ds1511_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) { + if (pdata->irq <= 0) return -EINVAL; - } + alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday; alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour; alrm->time.tm_min = pdata->alrm_min < 0 ? 0 : pdata->alrm_min; @@ -385,7 +385,7 @@ ds1511_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) { + if (pdata->irq <= 0) { return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */ } switch (cmd) { @@ -503,7 +503,6 @@ ds1511_rtc_probe(struct platform_device *pdev) if (!pdata) { return -ENOMEM; } - pdata->irq = -1; pdata->size = res->end - res->start + 1; if (!request_mem_region(res->start, pdata->size, pdev->name)) { ret = -EBUSY; @@ -545,13 +544,13 @@ ds1511_rtc_probe(struct platform_device *pdev) * if the platform has an interrupt in mind for this device, * then by all means, set it */ - if (pdata->irq >= 0) { + if (pdata->irq > 0) { rtc_read(RTC_CMD1); if (request_irq(pdata->irq, ds1511_interrupt, IRQF_DISABLED | IRQF_SHARED, pdev->name, pdev) < 0) { dev_warn(&pdev->dev, "interrupt not available.\n"); - pdata->irq = -1; + pdata->irq = 0; } } @@ -572,7 +571,7 @@ ds1511_rtc_probe(struct platform_device *pdev) if (pdata->rtc) { rtc_device_unregister(pdata->rtc); } - if (pdata->irq >= 0) { + if (pdata->irq > 0) { free_irq(pdata->irq, pdev); } if (ds1511_base) { @@ -595,7 +594,7 @@ ds1511_rtc_remove(struct platform_device *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr); rtc_device_unregister(pdata->rtc); pdata->rtc = NULL; - if (pdata->irq >= 0) { + if (pdata->irq > 0) { /* * disable the alarm interrupt */ diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c index b9475cd20210..38d472b63406 100644 --- a/drivers/rtc/rtc-ds1553.c +++ b/drivers/rtc/rtc-ds1553.c @@ -162,7 +162,7 @@ static int ds1553_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) + if (pdata->irq <= 0) return -EINVAL; pdata->alrm_mday = alrm->time.tm_mday; pdata->alrm_hour = alrm->time.tm_hour; @@ -179,7 +179,7 @@ static int ds1553_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) + if (pdata->irq <= 0) return -EINVAL; alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday; alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour; @@ -213,7 +213,7 @@ static int ds1553_rtc_ioctl(struct device *dev, unsigned int cmd, struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) + if (pdata->irq <= 0) return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */ switch (cmd) { case RTC_AIE_OFF: @@ -301,7 +301,6 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev) pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - pdata->irq = -1; if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) { ret = -EBUSY; goto out; @@ -327,13 +326,13 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev) if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_BLF) dev_warn(&pdev->dev, "voltage-low detected.\n"); - if (pdata->irq >= 0) { + if (pdata->irq > 0) { writeb(0, ioaddr + RTC_INTERRUPTS); if (request_irq(pdata->irq, ds1553_rtc_interrupt, IRQF_DISABLED | IRQF_SHARED, pdev->name, pdev) < 0) { dev_warn(&pdev->dev, "interrupt not available.\n"); - pdata->irq = -1; + pdata->irq = 0; } } @@ -353,7 +352,7 @@ static int __devinit ds1553_rtc_probe(struct platform_device *pdev) out: if (pdata->rtc) rtc_device_unregister(pdata->rtc); - if (pdata->irq >= 0) + if (pdata->irq > 0) free_irq(pdata->irq, pdev); if (ioaddr) iounmap(ioaddr); @@ -369,7 +368,7 @@ static int __devexit ds1553_rtc_remove(struct platform_device *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr); rtc_device_unregister(pdata->rtc); - if (pdata->irq >= 0) { + if (pdata->irq > 0) { writeb(0, pdata->ioaddr + RTC_INTERRUPTS); free_irq(pdata->irq, pdev); } diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 4e91419e8911..06dfb54f99b6 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -83,32 +83,11 @@ static int ds1672_set_mmss(struct i2c_client *client, unsigned long secs) return 0; } -static int ds1672_set_datetime(struct i2c_client *client, struct rtc_time *tm) -{ - unsigned long secs; - - dev_dbg(&client->dev, - "%s: secs=%d, mins=%d, hours=%d, " - "mday=%d, mon=%d, year=%d, wday=%d\n", - __func__, - tm->tm_sec, tm->tm_min, tm->tm_hour, - tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); - - rtc_tm_to_time(tm, &secs); - - return ds1672_set_mmss(client, secs); -} - static int ds1672_rtc_read_time(struct device *dev, struct rtc_time *tm) { return ds1672_get_datetime(to_i2c_client(dev), tm); } -static int ds1672_rtc_set_time(struct device *dev, struct rtc_time *tm) -{ - return ds1672_set_datetime(to_i2c_client(dev), tm); -} - static int ds1672_rtc_set_mmss(struct device *dev, unsigned long secs) { return ds1672_set_mmss(to_i2c_client(dev), secs); @@ -152,7 +131,6 @@ static DEVICE_ATTR(control, S_IRUGO, show_control, NULL); static const struct rtc_class_ops ds1672_rtc_ops = { .read_time = ds1672_rtc_read_time, - .set_time = ds1672_rtc_set_time, .set_mmss = ds1672_rtc_set_mmss, }; diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c index 45e5b106af73..c51589ede5b7 100644 --- a/drivers/rtc/rtc-ds3234.c +++ b/drivers/rtc/rtc-ds3234.c @@ -1,4 +1,4 @@ -/* drivers/rtc/rtc-ds3234.c +/* rtc-ds3234.c * * Driver for Dallas Semiconductor (DS3234) SPI RTC with Integrated Crystal * and SRAM. @@ -9,13 +9,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Changelog: - * - * 07-May-2008: Dennis Aberilla <denzzzhome@yahoo.com> - * - Created based on the max6902 code. Only implements the - * date/time keeping functions; no SRAM yet. */ +#include <linux/init.h> +#include <linux/module.h> #include <linux/device.h> #include <linux/platform_device.h> #include <linux/rtc.h> @@ -34,16 +31,7 @@ #define DS3234_REG_CONTROL 0x0E #define DS3234_REG_CONT_STAT 0x0F -#undef DS3234_DEBUG - -struct ds3234 { - struct rtc_device *rtc; - u8 buf[8]; /* Burst read: addr + 7 regs */ - u8 tx_buf[2]; - u8 rx_buf[2]; -}; - -static void ds3234_set_reg(struct device *dev, unsigned char address, +static int ds3234_set_reg(struct device *dev, unsigned char address, unsigned char data) { struct spi_device *spi = to_spi_device(dev); @@ -53,107 +41,45 @@ static void ds3234_set_reg(struct device *dev, unsigned char address, buf[0] = address | 0x80; buf[1] = data; - spi_write(spi, buf, 2); + return spi_write_then_read(spi, buf, 2, NULL, 0); } static int ds3234_get_reg(struct device *dev, unsigned char address, unsigned char *data) { struct spi_device *spi = to_spi_device(dev); - struct ds3234 *chip = dev_get_drvdata(dev); - struct spi_message message; - struct spi_transfer xfer; - int status; - - if (!data) - return -EINVAL; - - /* Build our spi message */ - spi_message_init(&message); - memset(&xfer, 0, sizeof(xfer)); - - /* Address + dummy tx byte */ - xfer.len = 2; - xfer.tx_buf = chip->tx_buf; - xfer.rx_buf = chip->rx_buf; - - chip->tx_buf[0] = address; - chip->tx_buf[1] = 0xff; - spi_message_add_tail(&xfer, &message); + *data = address & 0x7f; - /* do the i/o */ - status = spi_sync(spi, &message); - if (status == 0) - status = message.status; - else - return status; - - *data = chip->rx_buf[1]; - - return status; + return spi_write_then_read(spi, data, 1, data, 1); } -static int ds3234_get_datetime(struct device *dev, struct rtc_time *dt) +static int ds3234_read_time(struct device *dev, struct rtc_time *dt) { + int err; + unsigned char buf[8]; struct spi_device *spi = to_spi_device(dev); - struct ds3234 *chip = dev_get_drvdata(dev); - struct spi_message message; - struct spi_transfer xfer; - int status; - - /* build the message */ - spi_message_init(&message); - memset(&xfer, 0, sizeof(xfer)); - xfer.len = 1 + 7; /* Addr + 7 registers */ - xfer.tx_buf = chip->buf; - xfer.rx_buf = chip->buf; - chip->buf[0] = 0x00; /* Start address */ - spi_message_add_tail(&xfer, &message); - - /* do the i/o */ - status = spi_sync(spi, &message); - if (status == 0) - status = message.status; - else - return status; - /* Seconds, Minutes, Hours, Day, Date, Month, Year */ - dt->tm_sec = bcd2bin(chip->buf[1]); - dt->tm_min = bcd2bin(chip->buf[2]); - dt->tm_hour = bcd2bin(chip->buf[3] & 0x3f); - dt->tm_wday = bcd2bin(chip->buf[4]) - 1; /* 0 = Sun */ - dt->tm_mday = bcd2bin(chip->buf[5]); - dt->tm_mon = bcd2bin(chip->buf[6] & 0x1f) - 1; /* 0 = Jan */ - dt->tm_year = bcd2bin(chip->buf[7] & 0xff) + 100; /* Assume 20YY */ - -#ifdef DS3234_DEBUG - dev_dbg(dev, "\n%s : Read RTC values\n", __func__); - dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour); - dev_dbg(dev, "tm_min : %i\n", dt->tm_min); - dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec); - dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday); - dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday); - dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon); - dev_dbg(dev, "tm_year: %i\n", dt->tm_year); -#endif + buf[0] = 0x00; /* Start address */ - return 0; + err = spi_write_then_read(spi, buf, 1, buf, 8); + if (err != 0) + return err; + + /* Seconds, Minutes, Hours, Day, Date, Month, Year */ + dt->tm_sec = bcd2bin(buf[0]); + dt->tm_min = bcd2bin(buf[1]); + dt->tm_hour = bcd2bin(buf[2] & 0x3f); + dt->tm_wday = bcd2bin(buf[3]) - 1; /* 0 = Sun */ + dt->tm_mday = bcd2bin(buf[4]); + dt->tm_mon = bcd2bin(buf[5] & 0x1f) - 1; /* 0 = Jan */ + dt->tm_year = bcd2bin(buf[6] & 0xff) + 100; /* Assume 20YY */ + + return rtc_valid_tm(dt); } -static int ds3234_set_datetime(struct device *dev, struct rtc_time *dt) +static int ds3234_set_time(struct device *dev, struct rtc_time *dt) { -#ifdef DS3234_DEBUG - dev_dbg(dev, "\n%s : Setting RTC values\n", __func__); - dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec); - dev_dbg(dev, "tm_min : %i\n", dt->tm_min); - dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour); - dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday); - dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday); - dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon); - dev_dbg(dev, "tm_year: %i\n", dt->tm_year); -#endif - ds3234_set_reg(dev, DS3234_REG_SECONDS, bin2bcd(dt->tm_sec)); ds3234_set_reg(dev, DS3234_REG_MINUTES, bin2bcd(dt->tm_min)); ds3234_set_reg(dev, DS3234_REG_HOURS, bin2bcd(dt->tm_hour) & 0x3f); @@ -174,16 +100,6 @@ static int ds3234_set_datetime(struct device *dev, struct rtc_time *dt) return 0; } -static int ds3234_read_time(struct device *dev, struct rtc_time *tm) -{ - return ds3234_get_datetime(dev, tm); -} - -static int ds3234_set_time(struct device *dev, struct rtc_time *tm) -{ - return ds3234_set_datetime(dev, tm); -} - static const struct rtc_class_ops ds3234_rtc_ops = { .read_time = ds3234_read_time, .set_time = ds3234_set_time, @@ -193,31 +109,15 @@ static int __devinit ds3234_probe(struct spi_device *spi) { struct rtc_device *rtc; unsigned char tmp; - struct ds3234 *chip; int res; - rtc = rtc_device_register("ds3234", - &spi->dev, &ds3234_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) - return PTR_ERR(rtc); - spi->mode = SPI_MODE_3; spi->bits_per_word = 8; spi_setup(spi); - chip = kzalloc(sizeof(struct ds3234), GFP_KERNEL); - if (!chip) { - rtc_device_unregister(rtc); - return -ENOMEM; - } - chip->rtc = rtc; - dev_set_drvdata(&spi->dev, chip); - res = ds3234_get_reg(&spi->dev, DS3234_REG_SECONDS, &tmp); - if (res) { - rtc_device_unregister(rtc); + if (res != 0) return res; - } /* Control settings * @@ -246,26 +146,27 @@ static int __devinit ds3234_probe(struct spi_device *spi) ds3234_get_reg(&spi->dev, DS3234_REG_CONT_STAT, &tmp); dev_info(&spi->dev, "Ctrl/Stat Reg: 0x%02x\n", tmp); + rtc = rtc_device_register("ds3234", + &spi->dev, &ds3234_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + dev_set_drvdata(&spi->dev, rtc); + return 0; } static int __devexit ds3234_remove(struct spi_device *spi) { - struct ds3234 *chip = platform_get_drvdata(spi); - struct rtc_device *rtc = chip->rtc; - - if (rtc) - rtc_device_unregister(rtc); - - kfree(chip); + struct rtc_device *rtc = platform_get_drvdata(spi); + rtc_device_unregister(rtc); return 0; } static struct spi_driver ds3234_driver = { .driver = { .name = "ds3234", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, .probe = ds3234_probe, @@ -274,7 +175,6 @@ static struct spi_driver ds3234_driver = { static __init int ds3234_init(void) { - printk(KERN_INFO "DS3234 SPI RTC Driver\n"); return spi_register_driver(&ds3234_driver); } module_init(ds3234_init); diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 36e4ac0bd69c..f7a3283dd029 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -49,18 +49,6 @@ static int ep93xx_rtc_set_mmss(struct device *dev, unsigned long secs) return 0; } -static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm) -{ - int err; - unsigned long secs; - - err = rtc_tm_to_time(tm, &secs); - if (err != 0) - return err; - - return ep93xx_rtc_set_mmss(dev, secs); -} - static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq) { unsigned short preload, delete; @@ -75,7 +63,6 @@ static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq) static const struct rtc_class_ops ep93xx_rtc_ops = { .read_time = ep93xx_rtc_read_time, - .set_time = ep93xx_rtc_set_time, .set_mmss = ep93xx_rtc_set_mmss, .proc = ep93xx_rtc_proc, }; diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c index 43afb7ab5289..33921a6b1707 100644 --- a/drivers/rtc/rtc-m48t59.c +++ b/drivers/rtc/rtc-m48t59.c @@ -450,7 +450,7 @@ static int __devinit m48t59_rtc_probe(struct platform_device *pdev) * the mode without IRQ. */ m48t59->irq = platform_get_irq(pdev, 0); - if (m48t59->irq < 0) + if (m48t59->irq <= 0) m48t59->irq = NO_IRQ; if (m48t59->irq != NO_IRQ) { diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index 2f6507df7b49..36a8ea9ed8ba 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -9,14 +9,6 @@ * * Driver for MAX6902 spi RTC * - * Changelog: - * - * 24-May-2006: Raphael Assenat <raph@8d.com> - * - Major rework - * Converted to rtc_device and uses the SPI layer. - * - * ??-???-2005: Someone at Compulab - * - Initial driver creation. */ #include <linux/module.h> @@ -26,7 +18,6 @@ #include <linux/rtc.h> #include <linux/spi/spi.h> #include <linux/bcd.h> -#include <linux/delay.h> #define MAX6902_REG_SECONDS 0x01 #define MAX6902_REG_MINUTES 0x03 @@ -38,16 +29,7 @@ #define MAX6902_REG_CONTROL 0x0F #define MAX6902_REG_CENTURY 0x13 -#undef MAX6902_DEBUG - -struct max6902 { - struct rtc_device *rtc; - u8 buf[9]; /* Burst read cmd + 8 registers */ - u8 tx_buf[2]; - u8 rx_buf[2]; -}; - -static void max6902_set_reg(struct device *dev, unsigned char address, +static int max6902_set_reg(struct device *dev, unsigned char address, unsigned char data) { struct spi_device *spi = to_spi_device(dev); @@ -57,113 +39,58 @@ static void max6902_set_reg(struct device *dev, unsigned char address, buf[0] = address & 0x7f; buf[1] = data; - spi_write(spi, buf, 2); + return spi_write_then_read(spi, buf, 2, NULL, 0); } static int max6902_get_reg(struct device *dev, unsigned char address, unsigned char *data) { struct spi_device *spi = to_spi_device(dev); - struct max6902 *chip = dev_get_drvdata(dev); - struct spi_message message; - struct spi_transfer xfer; - int status; - - if (!data) - return -EINVAL; - - /* Build our spi message */ - spi_message_init(&message); - memset(&xfer, 0, sizeof(xfer)); - xfer.len = 2; - /* Can tx_buf and rx_buf be equal? The doc in spi.h is not sure... */ - xfer.tx_buf = chip->tx_buf; - xfer.rx_buf = chip->rx_buf; /* Set MSB to indicate read */ - chip->tx_buf[0] = address | 0x80; - - spi_message_add_tail(&xfer, &message); + *data = address | 0x80; - /* do the i/o */ - status = spi_sync(spi, &message); - - if (status == 0) - *data = chip->rx_buf[1]; - return status; + return spi_write_then_read(spi, data, 1, data, 1); } -static int max6902_get_datetime(struct device *dev, struct rtc_time *dt) +static int max6902_read_time(struct device *dev, struct rtc_time *dt) { - unsigned char tmp; - int century; - int err; + int err, century; struct spi_device *spi = to_spi_device(dev); - struct max6902 *chip = dev_get_drvdata(dev); - struct spi_message message; - struct spi_transfer xfer; - int status; + unsigned char buf[8]; - err = max6902_get_reg(dev, MAX6902_REG_CENTURY, &tmp); - if (err) - return err; - - /* build the message */ - spi_message_init(&message); - memset(&xfer, 0, sizeof(xfer)); - xfer.len = 1 + 7; /* Burst read command + 7 registers */ - xfer.tx_buf = chip->buf; - xfer.rx_buf = chip->buf; - chip->buf[0] = 0xbf; /* Burst read */ - spi_message_add_tail(&xfer, &message); + buf[0] = 0xbf; /* Burst read */ - /* do the i/o */ - status = spi_sync(spi, &message); - if (status) - return status; + err = spi_write_then_read(spi, buf, 1, buf, 8); + if (err != 0) + return err; /* The chip sends data in this order: * Seconds, Minutes, Hours, Date, Month, Day, Year */ - dt->tm_sec = bcd2bin(chip->buf[1]); - dt->tm_min = bcd2bin(chip->buf[2]); - dt->tm_hour = bcd2bin(chip->buf[3]); - dt->tm_mday = bcd2bin(chip->buf[4]); - dt->tm_mon = bcd2bin(chip->buf[5]) - 1; - dt->tm_wday = bcd2bin(chip->buf[6]); - dt->tm_year = bcd2bin(chip->buf[7]); + dt->tm_sec = bcd2bin(buf[0]); + dt->tm_min = bcd2bin(buf[1]); + dt->tm_hour = bcd2bin(buf[2]); + dt->tm_mday = bcd2bin(buf[3]); + dt->tm_mon = bcd2bin(buf[4]) - 1; + dt->tm_wday = bcd2bin(buf[5]); + dt->tm_year = bcd2bin(buf[6]); + + /* Read century */ + err = max6902_get_reg(dev, MAX6902_REG_CENTURY, &buf[0]); + if (err != 0) + return err; - century = bcd2bin(tmp) * 100; + century = bcd2bin(buf[0]) * 100; dt->tm_year += century; dt->tm_year -= 1900; -#ifdef MAX6902_DEBUG - printk("\n%s : Read RTC values\n",__func__); - printk("tm_hour: %i\n",dt->tm_hour); - printk("tm_min : %i\n",dt->tm_min); - printk("tm_sec : %i\n",dt->tm_sec); - printk("tm_year: %i\n",dt->tm_year); - printk("tm_mon : %i\n",dt->tm_mon); - printk("tm_mday: %i\n",dt->tm_mday); - printk("tm_wday: %i\n",dt->tm_wday); -#endif - - return 0; + return rtc_valid_tm(dt); } -static int max6902_set_datetime(struct device *dev, struct rtc_time *dt) +static int max6902_set_time(struct device *dev, struct rtc_time *dt) { - dt->tm_year = dt->tm_year+1900; - -#ifdef MAX6902_DEBUG - printk("\n%s : Setting RTC values\n",__func__); - printk("tm_sec : %i\n",dt->tm_sec); - printk("tm_min : %i\n",dt->tm_min); - printk("tm_hour: %i\n",dt->tm_hour); - printk("tm_mday: %i\n",dt->tm_mday); - printk("tm_wday: %i\n",dt->tm_wday); - printk("tm_year: %i\n",dt->tm_year); -#endif + dt->tm_year = dt->tm_year + 1900; /* Remove write protection */ max6902_set_reg(dev, 0xF, 0); @@ -173,10 +100,10 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt) max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour)); max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday)); - max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1)); + max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon + 1)); max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday)); - max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100)); - max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100)); + max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year % 100)); + max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year / 100)); /* Compulab used a delay here. However, the datasheet * does not mention a delay being required anywhere... */ @@ -188,16 +115,6 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt) return 0; } -static int max6902_read_time(struct device *dev, struct rtc_time *tm) -{ - return max6902_get_datetime(dev, tm); -} - -static int max6902_set_time(struct device *dev, struct rtc_time *tm) -{ - return max6902_set_datetime(dev, tm); -} - static const struct rtc_class_ops max6902_rtc_ops = { .read_time = max6902_read_time, .set_time = max6902_set_time, @@ -207,45 +124,29 @@ static int __devinit max6902_probe(struct spi_device *spi) { struct rtc_device *rtc; unsigned char tmp; - struct max6902 *chip; int res; - rtc = rtc_device_register("max6902", - &spi->dev, &max6902_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) - return PTR_ERR(rtc); - spi->mode = SPI_MODE_3; spi->bits_per_word = 8; spi_setup(spi); - chip = kzalloc(sizeof *chip, GFP_KERNEL); - if (!chip) { - rtc_device_unregister(rtc); - return -ENOMEM; - } - chip->rtc = rtc; - dev_set_drvdata(&spi->dev, chip); - res = max6902_get_reg(&spi->dev, MAX6902_REG_SECONDS, &tmp); - if (res) { - rtc_device_unregister(rtc); + if (res != 0) return res; - } + + rtc = rtc_device_register("max6902", + &spi->dev, &max6902_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); return 0; } static int __devexit max6902_remove(struct spi_device *spi) { - struct max6902 *chip = platform_get_drvdata(spi); - struct rtc_device *rtc = chip->rtc; - - if (rtc) - rtc_device_unregister(rtc); - - kfree(chip); + struct rtc_device *rtc = platform_get_drvdata(spi); + rtc_device_unregister(rtc); return 0; } @@ -261,7 +162,6 @@ static struct spi_driver max6902_driver = { static __init int max6902_init(void) { - printk("max6902 spi driver\n"); return spi_register_driver(&max6902_driver); } module_init(max6902_init); diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c new file mode 100644 index 000000000000..45f12dcd3716 --- /dev/null +++ b/drivers/rtc/rtc-mv.c @@ -0,0 +1,163 @@ +/* + * Driver for the RTC in Marvell SoCs. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/rtc.h> +#include <linux/bcd.h> +#include <linux/io.h> +#include <linux/platform_device.h> + + +#define RTC_TIME_REG_OFFS 0 +#define RTC_SECONDS_OFFS 0 +#define RTC_MINUTES_OFFS 8 +#define RTC_HOURS_OFFS 16 +#define RTC_WDAY_OFFS 24 +#define RTC_HOURS_12H_MODE (1 << 22) /* 12 hours mode */ + +#define RTC_DATE_REG_OFFS 4 +#define RTC_MDAY_OFFS 0 +#define RTC_MONTH_OFFS 8 +#define RTC_YEAR_OFFS 16 + + +struct rtc_plat_data { + struct rtc_device *rtc; + void __iomem *ioaddr; +}; + +static int mv_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct rtc_plat_data *pdata = dev_get_drvdata(dev); + void __iomem *ioaddr = pdata->ioaddr; + u32 rtc_reg; + + rtc_reg = (bin2bcd(tm->tm_sec) << RTC_SECONDS_OFFS) | + (bin2bcd(tm->tm_min) << RTC_MINUTES_OFFS) | + (bin2bcd(tm->tm_hour) << RTC_HOURS_OFFS) | + (bin2bcd(tm->tm_wday) << RTC_WDAY_OFFS); + writel(rtc_reg, ioaddr + RTC_TIME_REG_OFFS); + + rtc_reg = (bin2bcd(tm->tm_mday) << RTC_MDAY_OFFS) | + (bin2bcd(tm->tm_mon + 1) << RTC_MONTH_OFFS) | + (bin2bcd(tm->tm_year % 100) << RTC_YEAR_OFFS); + writel(rtc_reg, ioaddr + RTC_DATE_REG_OFFS); + + return 0; +} + +static int mv_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct rtc_plat_data *pdata = dev_get_drvdata(dev); + void __iomem *ioaddr = pdata->ioaddr; + u32 rtc_time, rtc_date; + unsigned int year, month, day, hour, minute, second, wday; + + rtc_time = readl(ioaddr + RTC_TIME_REG_OFFS); + rtc_date = readl(ioaddr + RTC_DATE_REG_OFFS); + + second = rtc_time & 0x7f; + minute = (rtc_time >> RTC_MINUTES_OFFS) & 0x7f; + hour = (rtc_time >> RTC_HOURS_OFFS) & 0x3f; /* assume 24 hours mode */ + wday = (rtc_time >> RTC_WDAY_OFFS) & 0x7; + + day = rtc_date & 0x3f; + month = (rtc_date >> RTC_MONTH_OFFS) & 0x3f; + year = (rtc_date >> RTC_YEAR_OFFS) & 0xff; + + tm->tm_sec = bcd2bin(second); + tm->tm_min = bcd2bin(minute); + tm->tm_hour = bcd2bin(hour); + tm->tm_mday = bcd2bin(day); + tm->tm_wday = bcd2bin(wday); + tm->tm_mon = bcd2bin(month) - 1; + /* hw counts from year 2000, but tm_year is relative to 1900 */ + tm->tm_year = bcd2bin(year) + 100; + + return rtc_valid_tm(tm); +} + +static const struct rtc_class_ops mv_rtc_ops = { + .read_time = mv_rtc_read_time, + .set_time = mv_rtc_set_time, +}; + +static int __init mv_rtc_probe(struct platform_device *pdev) +{ + struct resource *res; + struct rtc_plat_data *pdata; + resource_size_t size; + u32 rtc_time; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + size = resource_size(res); + if (!devm_request_mem_region(&pdev->dev, res->start, size, + pdev->name)) + return -EBUSY; + + pdata->ioaddr = devm_ioremap(&pdev->dev, res->start, size); + if (!pdata->ioaddr) + return -ENOMEM; + + /* make sure the 24 hours mode is enabled */ + rtc_time = readl(pdata->ioaddr + RTC_TIME_REG_OFFS); + if (rtc_time & RTC_HOURS_12H_MODE) { + dev_err(&pdev->dev, "24 Hours mode not supported.\n"); + return -EINVAL; + } + + platform_set_drvdata(pdev, pdata); + pdata->rtc = rtc_device_register(pdev->name, &pdev->dev, + &mv_rtc_ops, THIS_MODULE); + if (IS_ERR(pdata->rtc)) + return PTR_ERR(pdata->rtc); + + return 0; +} + +static int __exit mv_rtc_remove(struct platform_device *pdev) +{ + struct rtc_plat_data *pdata = platform_get_drvdata(pdev); + + rtc_device_unregister(pdata->rtc); + return 0; +} + +static struct platform_driver mv_rtc_driver = { + .remove = __exit_p(mv_rtc_remove), + .driver = { + .name = "rtc-mv", + .owner = THIS_MODULE, + }, +}; + +static __init int mv_init(void) +{ + return platform_driver_probe(&mv_rtc_driver, mv_rtc_probe); +} + +static __exit void mv_exit(void) +{ + platform_driver_unregister(&mv_rtc_driver); +} + +module_init(mv_init); +module_exit(mv_exit); + +MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); +MODULE_DESCRIPTION("Marvell RTC driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:rtc-mv"); diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c new file mode 100644 index 000000000000..cc7eb8767b82 --- /dev/null +++ b/drivers/rtc/rtc-pxa.c @@ -0,0 +1,489 @@ +/* + * Real Time Clock interface for XScale PXA27x and PXA3xx + * + * Copyright (C) 2008 Robert Jarzmik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/rtc.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <linux/io.h> + +#define TIMER_FREQ CLOCK_TICK_RATE +#define RTC_DEF_DIVIDER (32768 - 1) +#define RTC_DEF_TRIM 0 +#define MAXFREQ_PERIODIC 1000 + +/* + * PXA Registers and bits definitions + */ +#define RTSR_PICE (1 << 15) /* Periodic interrupt count enable */ +#define RTSR_PIALE (1 << 14) /* Periodic interrupt Alarm enable */ +#define RTSR_PIAL (1 << 13) /* Periodic interrupt detected */ +#define RTSR_SWALE2 (1 << 11) /* RTC stopwatch alarm2 enable */ +#define RTSR_SWAL2 (1 << 10) /* RTC stopwatch alarm2 detected */ +#define RTSR_SWALE1 (1 << 9) /* RTC stopwatch alarm1 enable */ +#define RTSR_SWAL1 (1 << 8) /* RTC stopwatch alarm1 detected */ +#define RTSR_RDALE2 (1 << 7) /* RTC alarm2 enable */ +#define RTSR_RDAL2 (1 << 6) /* RTC alarm2 detected */ +#define RTSR_RDALE1 (1 << 5) /* RTC alarm1 enable */ +#define RTSR_RDAL1 (1 << 4) /* RTC alarm1 detected */ +#define RTSR_HZE (1 << 3) /* HZ interrupt enable */ +#define RTSR_ALE (1 << 2) /* RTC alarm interrupt enable */ +#define RTSR_HZ (1 << 1) /* HZ rising-edge detected */ +#define RTSR_AL (1 << 0) /* RTC alarm detected */ +#define RTSR_TRIG_MASK (RTSR_AL | RTSR_HZ | RTSR_RDAL1 | RTSR_RDAL2\ + | RTSR_SWAL1 | RTSR_SWAL2) +#define RYxR_YEAR_S 9 +#define RYxR_YEAR_MASK (0xfff << RYxR_YEAR_S) +#define RYxR_MONTH_S 5 +#define RYxR_MONTH_MASK (0xf << RYxR_MONTH_S) +#define RYxR_DAY_MASK 0x1f +#define RDxR_HOUR_S 12 +#define RDxR_HOUR_MASK (0x1f << RDxR_HOUR_S) +#define RDxR_MIN_S 6 +#define RDxR_MIN_MASK (0x3f << RDxR_MIN_S) +#define RDxR_SEC_MASK 0x3f + +#define RTSR 0x08 +#define RTTR 0x0c +#define RDCR 0x10 +#define RYCR 0x14 +#define RDAR1 0x18 +#define RYAR1 0x1c +#define RTCPICR 0x34 +#define PIAR 0x38 + +#define rtc_readl(pxa_rtc, reg) \ + __raw_readl((pxa_rtc)->base + (reg)) +#define rtc_writel(pxa_rtc, reg, value) \ + __raw_writel((value), (pxa_rtc)->base + (reg)) + +struct pxa_rtc { + struct resource *ress; + void __iomem *base; + int irq_1Hz; + int irq_Alrm; + struct rtc_device *rtc; + spinlock_t lock; /* Protects this structure */ + struct rtc_time rtc_alarm; +}; + +static u32 ryxr_calc(struct rtc_time *tm) +{ + return ((tm->tm_year + 1900) << RYxR_YEAR_S) + | ((tm->tm_mon + 1) << RYxR_MONTH_S) + | tm->tm_mday; +} + +static u32 rdxr_calc(struct rtc_time *tm) +{ + return (tm->tm_hour << RDxR_HOUR_S) | (tm->tm_min << RDxR_MIN_S) + | tm->tm_sec; +} + +static void tm_calc(u32 rycr, u32 rdcr, struct rtc_time *tm) +{ + tm->tm_year = ((rycr & RYxR_YEAR_MASK) >> RYxR_YEAR_S) - 1900; + tm->tm_mon = (((rycr & RYxR_MONTH_MASK) >> RYxR_MONTH_S)) - 1; + tm->tm_mday = (rycr & RYxR_DAY_MASK); + tm->tm_hour = (rdcr & RDxR_HOUR_MASK) >> RDxR_HOUR_S; + tm->tm_min = (rdcr & RDxR_MIN_MASK) >> RDxR_MIN_S; + tm->tm_sec = rdcr & RDxR_SEC_MASK; +} + +static void rtsr_clear_bits(struct pxa_rtc *pxa_rtc, u32 mask) +{ + u32 rtsr; + + rtsr = rtc_readl(pxa_rtc, RTSR); + rtsr &= ~RTSR_TRIG_MASK; + rtsr &= ~mask; + rtc_writel(pxa_rtc, RTSR, rtsr); +} + +static void rtsr_set_bits(struct pxa_rtc *pxa_rtc, u32 mask) +{ + u32 rtsr; + + rtsr = rtc_readl(pxa_rtc, RTSR); + rtsr &= ~RTSR_TRIG_MASK; + rtsr |= mask; + rtc_writel(pxa_rtc, RTSR, rtsr); +} + +static irqreturn_t pxa_rtc_irq(int irq, void *dev_id) +{ + struct platform_device *pdev = to_platform_device(dev_id); + struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); + u32 rtsr; + unsigned long events = 0; + + spin_lock(&pxa_rtc->lock); + + /* clear interrupt sources */ + rtsr = rtc_readl(pxa_rtc, RTSR); + rtc_writel(pxa_rtc, RTSR, rtsr); + + /* temporary disable rtc interrupts */ + rtsr_clear_bits(pxa_rtc, RTSR_RDALE1 | RTSR_PIALE | RTSR_HZE); + + /* clear alarm interrupt if it has occurred */ + if (rtsr & RTSR_RDAL1) + rtsr &= ~RTSR_RDALE1; + + /* update irq data & counter */ + if (rtsr & RTSR_RDAL1) + events |= RTC_AF | RTC_IRQF; + if (rtsr & RTSR_HZ) + events |= RTC_UF | RTC_IRQF; + if (rtsr & RTSR_PIAL) + events |= RTC_PF | RTC_IRQF; + + rtc_update_irq(pxa_rtc->rtc, 1, events); + + /* enable back rtc interrupts */ + rtc_writel(pxa_rtc, RTSR, rtsr & ~RTSR_TRIG_MASK); + + spin_unlock(&pxa_rtc->lock); + return IRQ_HANDLED; +} + +static int pxa_rtc_open(struct device *dev) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + int ret; + + ret = request_irq(pxa_rtc->irq_1Hz, pxa_rtc_irq, IRQF_DISABLED, + "rtc 1Hz", dev); + if (ret < 0) { + dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_1Hz, + ret); + goto err_irq_1Hz; + } + ret = request_irq(pxa_rtc->irq_Alrm, pxa_rtc_irq, IRQF_DISABLED, + "rtc Alrm", dev); + if (ret < 0) { + dev_err(dev, "can't get irq %i, err %d\n", pxa_rtc->irq_Alrm, + ret); + goto err_irq_Alrm; + } + + return 0; + +err_irq_Alrm: + free_irq(pxa_rtc->irq_1Hz, dev); +err_irq_1Hz: + return ret; +} + +static void pxa_rtc_release(struct device *dev) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + + spin_lock_irq(&pxa_rtc->lock); + rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE); + spin_unlock_irq(&pxa_rtc->lock); + + free_irq(pxa_rtc->irq_Alrm, dev); + free_irq(pxa_rtc->irq_1Hz, dev); +} + +static int pxa_periodic_irq_set_freq(struct device *dev, int freq) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + int period_ms; + + if (freq < 1 || freq > MAXFREQ_PERIODIC) + return -EINVAL; + + period_ms = 1000 / freq; + rtc_writel(pxa_rtc, PIAR, period_ms); + + return 0; +} + +static int pxa_periodic_irq_set_state(struct device *dev, int enabled) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + + if (enabled) + rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE); + else + rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE); + + return 0; +} + +static int pxa_rtc_ioctl(struct device *dev, unsigned int cmd, + unsigned long arg) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + int ret = 0; + + spin_lock_irq(&pxa_rtc->lock); + switch (cmd) { + case RTC_AIE_OFF: + rtsr_clear_bits(pxa_rtc, RTSR_RDALE1); + break; + case RTC_AIE_ON: + rtsr_set_bits(pxa_rtc, RTSR_RDALE1); + break; + case RTC_UIE_OFF: + rtsr_clear_bits(pxa_rtc, RTSR_HZE); + break; + case RTC_UIE_ON: + rtsr_set_bits(pxa_rtc, RTSR_HZE); + break; + default: + ret = -ENOIOCTLCMD; + } + + spin_unlock_irq(&pxa_rtc->lock); + return ret; +} + +static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + u32 rycr, rdcr; + + rycr = rtc_readl(pxa_rtc, RYCR); + rdcr = rtc_readl(pxa_rtc, RDCR); + + tm_calc(rycr, rdcr, tm); + return 0; +} + +static int pxa_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + + rtc_writel(pxa_rtc, RYCR, ryxr_calc(tm)); + rtc_writel(pxa_rtc, RDCR, rdxr_calc(tm)); + + return 0; +} + +static int pxa_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + u32 rtsr, ryar, rdar; + + ryar = rtc_readl(pxa_rtc, RYAR1); + rdar = rtc_readl(pxa_rtc, RDAR1); + tm_calc(ryar, rdar, &alrm->time); + + rtsr = rtc_readl(pxa_rtc, RTSR); + alrm->enabled = (rtsr & RTSR_RDALE1) ? 1 : 0; + alrm->pending = (rtsr & RTSR_RDAL1) ? 1 : 0; + return 0; +} + +static int pxa_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + u32 rtsr; + + spin_lock_irq(&pxa_rtc->lock); + + rtc_writel(pxa_rtc, RYAR1, ryxr_calc(&alrm->time)); + rtc_writel(pxa_rtc, RDAR1, rdxr_calc(&alrm->time)); + + rtsr = rtc_readl(pxa_rtc, RTSR); + if (alrm->enabled) + rtsr |= RTSR_RDALE1; + else + rtsr &= ~RTSR_RDALE1; + rtc_writel(pxa_rtc, RTSR, rtsr); + + spin_unlock_irq(&pxa_rtc->lock); + + return 0; +} + +static int pxa_rtc_proc(struct device *dev, struct seq_file *seq) +{ + struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); + + seq_printf(seq, "trim/divider\t: 0x%08x\n", rtc_readl(pxa_rtc, RTTR)); + seq_printf(seq, "update_IRQ\t: %s\n", + (rtc_readl(pxa_rtc, RTSR) & RTSR_HZE) ? "yes" : "no"); + seq_printf(seq, "periodic_IRQ\t: %s\n", + (rtc_readl(pxa_rtc, RTSR) & RTSR_PIALE) ? "yes" : "no"); + seq_printf(seq, "periodic_freq\t: %u\n", rtc_readl(pxa_rtc, PIAR)); + + return 0; +} + +static const struct rtc_class_ops pxa_rtc_ops = { + .open = pxa_rtc_open, + .release = pxa_rtc_release, + .ioctl = pxa_rtc_ioctl, + .read_time = pxa_rtc_read_time, + .set_time = pxa_rtc_set_time, + .read_alarm = pxa_rtc_read_alarm, + .set_alarm = pxa_rtc_set_alarm, + .proc = pxa_rtc_proc, + .irq_set_state = pxa_periodic_irq_set_state, + .irq_set_freq = pxa_periodic_irq_set_freq, +}; + +static int __init pxa_rtc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pxa_rtc *pxa_rtc; + int ret; + u32 rttr; + + pxa_rtc = kzalloc(sizeof(struct pxa_rtc), GFP_KERNEL); + if (!pxa_rtc) + return -ENOMEM; + + spin_lock_init(&pxa_rtc->lock); + platform_set_drvdata(pdev, pxa_rtc); + + ret = -ENXIO; + pxa_rtc->ress = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!pxa_rtc->ress) { + dev_err(dev, "No I/O memory resource defined\n"); + goto err_ress; + } + + pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0); + if (pxa_rtc->irq_1Hz < 0) { + dev_err(dev, "No 1Hz IRQ resource defined\n"); + goto err_ress; + } + pxa_rtc->irq_Alrm = platform_get_irq(pdev, 1); + if (pxa_rtc->irq_Alrm < 0) { + dev_err(dev, "No alarm IRQ resource defined\n"); + goto err_ress; + } + + ret = -ENOMEM; + pxa_rtc->base = ioremap(pxa_rtc->ress->start, + resource_size(pxa_rtc->ress)); + if (!pxa_rtc->base) { + dev_err(&pdev->dev, "Unable to map pxa RTC I/O memory\n"); + goto err_map; + } + + /* + * If the clock divider is uninitialized then reset it to the + * default value to get the 1Hz clock. + */ + if (rtc_readl(pxa_rtc, RTTR) == 0) { + rttr = RTC_DEF_DIVIDER + (RTC_DEF_TRIM << 16); + rtc_writel(pxa_rtc, RTTR, rttr); + dev_warn(dev, "warning: initializing default clock" + " divider/trim value\n"); + } + + rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_RDALE1 | RTSR_HZE); + + pxa_rtc->rtc = rtc_device_register("pxa-rtc", &pdev->dev, &pxa_rtc_ops, + THIS_MODULE); + ret = PTR_ERR(pxa_rtc->rtc); + if (IS_ERR(pxa_rtc->rtc)) { + dev_err(dev, "Failed to register RTC device -> %d\n", ret); + goto err_rtc_reg; + } + + device_init_wakeup(dev, 1); + + return 0; + +err_rtc_reg: + iounmap(pxa_rtc->base); +err_ress: +err_map: + kfree(pxa_rtc); + return ret; +} + +static int __exit pxa_rtc_remove(struct platform_device *pdev) +{ + struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); + + rtc_device_unregister(pxa_rtc->rtc); + + spin_lock_irq(&pxa_rtc->lock); + iounmap(pxa_rtc->base); + spin_unlock_irq(&pxa_rtc->lock); + + kfree(pxa_rtc); + + return 0; +} + +#ifdef CONFIG_PM +static int pxa_rtc_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); + + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(pxa_rtc->irq_Alrm); + return 0; +} + +static int pxa_rtc_resume(struct platform_device *pdev) +{ + struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev); + + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(pxa_rtc->irq_Alrm); + return 0; +} +#else +#define pxa_rtc_suspend NULL +#define pxa_rtc_resume NULL +#endif + +static struct platform_driver pxa_rtc_driver = { + .remove = __exit_p(pxa_rtc_remove), + .suspend = pxa_rtc_suspend, + .resume = pxa_rtc_resume, + .driver = { + .name = "pxa-rtc", + }, +}; + +static int __init pxa_rtc_init(void) +{ + if (cpu_is_pxa27x() || cpu_is_pxa3xx()) + return platform_driver_probe(&pxa_rtc_driver, pxa_rtc_probe); + + return -ENODEV; +} + +static void __exit pxa_rtc_exit(void) +{ + platform_driver_unregister(&pxa_rtc_driver); +} + +module_init(pxa_rtc_init); +module_exit(pxa_rtc_exit); + +MODULE_AUTHOR("Robert Jarzmik"); +MODULE_DESCRIPTION("PXA27x/PXA3xx Realtime Clock Driver (RTC)"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pxa-rtc"); diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 7a568beba3f0..e0d7b9991505 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -94,6 +94,9 @@ static int s3c_rtc_setfreq(struct device *dev, int freq) { unsigned int tmp; + if (!is_power_of_2(freq)) + return -EINVAL; + spin_lock_irq(&s3c_rtc_pie_lock); tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE; diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index aaf9d6a337cc..1c3fc6b428e9 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/io.h> +#include <linux/log2.h> #include <asm/rtc.h> #define DRV_NAME "sh-rtc" @@ -89,7 +90,9 @@ struct sh_rtc { void __iomem *regbase; unsigned long regsize; struct resource *res; - unsigned int alarm_irq, periodic_irq, carry_irq; + int alarm_irq; + int periodic_irq; + int carry_irq; struct rtc_device *rtc_dev; spinlock_t lock; unsigned long capabilities; /* See asm-sh/rtc.h for cap bits */ @@ -549,6 +552,8 @@ static int sh_rtc_irq_set_state(struct device *dev, int enabled) static int sh_rtc_irq_set_freq(struct device *dev, int freq) { + if (!is_power_of_2(freq)) + return -EINVAL; return sh_rtc_ioctl(dev, RTC_IRQP_SET, freq); } @@ -578,7 +583,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev) /* get periodic/carry/alarm irqs */ ret = platform_get_irq(pdev, 0); - if (unlikely(ret < 0)) { + if (unlikely(ret <= 0)) { ret = -ENOENT; dev_err(&pdev->dev, "No IRQ for period\n"); goto err_badres; @@ -586,7 +591,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev) rtc->periodic_irq = ret; ret = platform_get_irq(pdev, 1); - if (unlikely(ret < 0)) { + if (unlikely(ret <= 0)) { ret = -ENOENT; dev_err(&pdev->dev, "No IRQ for carry\n"); goto err_badres; @@ -594,7 +599,7 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev) rtc->carry_irq = ret; ret = platform_get_irq(pdev, 2); - if (unlikely(ret < 0)) { + if (unlikely(ret <= 0)) { ret = -ENOENT; dev_err(&pdev->dev, "No IRQ for alarm\n"); goto err_badres; diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c index f4cd46e15af9..dc0b6224ad9b 100644 --- a/drivers/rtc/rtc-stk17ta8.c +++ b/drivers/rtc/rtc-stk17ta8.c @@ -170,7 +170,7 @@ static int stk17ta8_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) + if (pdata->irq <= 0) return -EINVAL; pdata->alrm_mday = alrm->time.tm_mday; pdata->alrm_hour = alrm->time.tm_hour; @@ -187,7 +187,7 @@ static int stk17ta8_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) + if (pdata->irq <= 0) return -EINVAL; alrm->time.tm_mday = pdata->alrm_mday < 0 ? 0 : pdata->alrm_mday; alrm->time.tm_hour = pdata->alrm_hour < 0 ? 0 : pdata->alrm_hour; @@ -221,7 +221,7 @@ static int stk17ta8_rtc_ioctl(struct device *dev, unsigned int cmd, struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - if (pdata->irq < 0) + if (pdata->irq <= 0) return -ENOIOCTLCMD; /* fall back into rtc-dev's emulation */ switch (cmd) { case RTC_AIE_OFF: @@ -303,7 +303,6 @@ static int __init stk17ta8_rtc_probe(struct platform_device *pdev) pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - pdata->irq = -1; if (!request_mem_region(res->start, RTC_REG_SIZE, pdev->name)) { ret = -EBUSY; goto out; @@ -329,13 +328,13 @@ static int __init stk17ta8_rtc_probe(struct platform_device *pdev) if (readb(ioaddr + RTC_FLAGS) & RTC_FLAGS_PF) dev_warn(&pdev->dev, "voltage-low detected.\n"); - if (pdata->irq >= 0) { + if (pdata->irq > 0) { writeb(0, ioaddr + RTC_INTERRUPTS); if (request_irq(pdata->irq, stk17ta8_rtc_interrupt, IRQF_DISABLED | IRQF_SHARED, pdev->name, pdev) < 0) { dev_warn(&pdev->dev, "interrupt not available.\n"); - pdata->irq = -1; + pdata->irq = 0; } } @@ -355,7 +354,7 @@ static int __init stk17ta8_rtc_probe(struct platform_device *pdev) out: if (pdata->rtc) rtc_device_unregister(pdata->rtc); - if (pdata->irq >= 0) + if (pdata->irq > 0) free_irq(pdata->irq, pdev); if (ioaddr) iounmap(ioaddr); @@ -371,7 +370,7 @@ static int __devexit stk17ta8_rtc_remove(struct platform_device *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr); rtc_device_unregister(pdata->rtc); - if (pdata->irq >= 0) { + if (pdata->irq > 0) { writeb(0, pdata->ioaddr + RTC_INTERRUPTS); free_irq(pdata->irq, pdev); } diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index bc930022004a..e478280ff628 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -34,14 +34,9 @@ static int test_rtc_read_time(struct device *dev, return 0; } -static int test_rtc_set_time(struct device *dev, - struct rtc_time *tm) -{ - return 0; -} - static int test_rtc_set_mmss(struct device *dev, unsigned long secs) { + dev_info(dev, "%s, secs = %lu\n", __func__, secs); return 0; } @@ -78,7 +73,6 @@ static int test_rtc_ioctl(struct device *dev, unsigned int cmd, static const struct rtc_class_ops test_rtc_ops = { .proc = test_rtc_proc, .read_time = test_rtc_read_time, - .set_time = test_rtc_set_time, .read_alarm = test_rtc_read_alarm, .set_alarm = test_rtc_set_alarm, .set_mmss = test_rtc_set_mmss, diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c index 01d8da9afdc8..8ce5f74ee45b 100644 --- a/drivers/rtc/rtc-twl4030.c +++ b/drivers/rtc/rtc-twl4030.c @@ -19,6 +19,7 @@ */ #include <linux/kernel.h> +#include <linux/errno.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> @@ -415,8 +416,8 @@ static int __devinit twl4030_rtc_probe(struct platform_device *pdev) int irq = platform_get_irq(pdev, 0); u8 rd_reg; - if (irq < 0) - return irq; + if (irq <= 0) + return -EINVAL; rtc = rtc_device_register(pdev->name, &pdev->dev, &twl4030_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c new file mode 100644 index 000000000000..4ee4857ff207 --- /dev/null +++ b/drivers/rtc/rtc-tx4939.c @@ -0,0 +1,317 @@ +/* + * TX4939 internal RTC driver + * Based on RBTX49xx patch from CELF patch archive. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * (C) Copyright TOSHIBA CORPORATION 2005-2007 + */ +#include <linux/rtc.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <asm/txx9/tx4939.h> + +struct tx4939rtc_plat_data { + struct rtc_device *rtc; + struct tx4939_rtc_reg __iomem *rtcreg; +}; + +static struct tx4939rtc_plat_data *get_tx4939rtc_plat_data(struct device *dev) +{ + return platform_get_drvdata(to_platform_device(dev)); +} + +static int tx4939_rtc_cmd(struct tx4939_rtc_reg __iomem *rtcreg, int cmd) +{ + int i = 0; + + __raw_writel(cmd, &rtcreg->ctl); + /* This might take 30us (next 32.768KHz clock) */ + while (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_BUSY) { + /* timeout on approx. 100us (@ GBUS200MHz) */ + if (i++ > 200 * 100) + return -EBUSY; + cpu_relax(); + } + return 0; +} + +static int tx4939_rtc_set_mmss(struct device *dev, unsigned long secs) +{ + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + int i, ret; + unsigned char buf[6]; + + buf[0] = 0; + buf[1] = 0; + buf[2] = secs; + buf[3] = secs >> 8; + buf[4] = secs >> 16; + buf[5] = secs >> 24; + spin_lock_irq(&pdata->rtc->irq_lock); + __raw_writel(0, &rtcreg->adr); + for (i = 0; i < 6; i++) + __raw_writel(buf[i], &rtcreg->dat); + ret = tx4939_rtc_cmd(rtcreg, + TX4939_RTCCTL_COMMAND_SETTIME | + (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME)); + spin_unlock_irq(&pdata->rtc->irq_lock); + return ret; +} + +static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + int i, ret; + unsigned long sec; + unsigned char buf[6]; + + spin_lock_irq(&pdata->rtc->irq_lock); + ret = tx4939_rtc_cmd(rtcreg, + TX4939_RTCCTL_COMMAND_GETTIME | + (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME)); + if (ret) { + spin_unlock_irq(&pdata->rtc->irq_lock); + return ret; + } + __raw_writel(2, &rtcreg->adr); + for (i = 2; i < 6; i++) + buf[i] = __raw_readl(&rtcreg->dat); + spin_unlock_irq(&pdata->rtc->irq_lock); + sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2]; + rtc_time_to_tm(sec, tm); + return rtc_valid_tm(tm); +} + +static int tx4939_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + int i, ret; + unsigned long sec; + unsigned char buf[6]; + + if (alrm->time.tm_sec < 0 || + alrm->time.tm_min < 0 || + alrm->time.tm_hour < 0 || + alrm->time.tm_mday < 0 || + alrm->time.tm_mon < 0 || + alrm->time.tm_year < 0) + return -EINVAL; + rtc_tm_to_time(&alrm->time, &sec); + buf[0] = 0; + buf[1] = 0; + buf[2] = sec; + buf[3] = sec >> 8; + buf[4] = sec >> 16; + buf[5] = sec >> 24; + spin_lock_irq(&pdata->rtc->irq_lock); + __raw_writel(0, &rtcreg->adr); + for (i = 0; i < 6; i++) + __raw_writel(buf[i], &rtcreg->dat); + ret = tx4939_rtc_cmd(rtcreg, TX4939_RTCCTL_COMMAND_SETALARM | + (alrm->enabled ? TX4939_RTCCTL_ALME : 0)); + spin_unlock_irq(&pdata->rtc->irq_lock); + return ret; +} + +static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + int i, ret; + unsigned long sec; + unsigned char buf[6]; + u32 ctl; + + spin_lock_irq(&pdata->rtc->irq_lock); + ret = tx4939_rtc_cmd(rtcreg, + TX4939_RTCCTL_COMMAND_GETALARM | + (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALME)); + if (ret) { + spin_unlock_irq(&pdata->rtc->irq_lock); + return ret; + } + __raw_writel(2, &rtcreg->adr); + for (i = 2; i < 6; i++) + buf[i] = __raw_readl(&rtcreg->dat); + ctl = __raw_readl(&rtcreg->ctl); + alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0; + alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0; + spin_unlock_irq(&pdata->rtc->irq_lock); + sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2]; + rtc_time_to_tm(sec, &alrm->time); + return rtc_valid_tm(&alrm->time); +} + +static int tx4939_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) +{ + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + + spin_lock_irq(&pdata->rtc->irq_lock); + tx4939_rtc_cmd(pdata->rtcreg, + TX4939_RTCCTL_COMMAND_NOP | + (enabled ? TX4939_RTCCTL_ALME : 0)); + spin_unlock_irq(&pdata->rtc->irq_lock); + return 0; +} + +static irqreturn_t tx4939_rtc_interrupt(int irq, void *dev_id) +{ + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev_id); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + unsigned long events = RTC_IRQF; + + spin_lock(&pdata->rtc->irq_lock); + if (__raw_readl(&rtcreg->ctl) & TX4939_RTCCTL_ALMD) { + events |= RTC_AF; + tx4939_rtc_cmd(rtcreg, TX4939_RTCCTL_COMMAND_NOP); + } + spin_unlock(&pdata->rtc->irq_lock); + rtc_update_irq(pdata->rtc, 1, events); + return IRQ_HANDLED; +} + +static const struct rtc_class_ops tx4939_rtc_ops = { + .read_time = tx4939_rtc_read_time, + .read_alarm = tx4939_rtc_read_alarm, + .set_alarm = tx4939_rtc_set_alarm, + .set_mmss = tx4939_rtc_set_mmss, + .alarm_irq_enable = tx4939_rtc_alarm_irq_enable, +}; + +static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + ssize_t count; + + spin_lock_irq(&pdata->rtc->irq_lock); + for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE; + count++, size--) { + __raw_writel(pos++, &rtcreg->adr); + *buf++ = __raw_readl(&rtcreg->dat); + } + spin_unlock_irq(&pdata->rtc->irq_lock); + return count; +} + +static ssize_t tx4939_rtc_nvram_write(struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t size) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev); + struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg; + ssize_t count; + + spin_lock_irq(&pdata->rtc->irq_lock); + for (count = 0; size > 0 && pos < TX4939_RTC_REG_RAMSIZE; + count++, size--) { + __raw_writel(pos++, &rtcreg->adr); + __raw_writel(*buf++, &rtcreg->dat); + } + spin_unlock_irq(&pdata->rtc->irq_lock); + return count; +} + +static struct bin_attribute tx4939_rtc_nvram_attr = { + .attr = { + .name = "nvram", + .mode = S_IRUGO | S_IWUSR, + }, + .size = TX4939_RTC_REG_RAMSIZE, + .read = tx4939_rtc_nvram_read, + .write = tx4939_rtc_nvram_write, +}; + +static int __init tx4939_rtc_probe(struct platform_device *pdev) +{ + struct rtc_device *rtc; + struct tx4939rtc_plat_data *pdata; + struct resource *res; + int irq, ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -ENODEV; + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + platform_set_drvdata(pdev, pdata); + + if (!devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), pdev->name)) + return -EBUSY; + pdata->rtcreg = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!pdata->rtcreg) + return -EBUSY; + + tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP); + if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt, + IRQF_DISABLED | IRQF_SHARED, + pdev->name, &pdev->dev) < 0) { + return -EBUSY; + } + rtc = rtc_device_register(pdev->name, &pdev->dev, + &tx4939_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + pdata->rtc = rtc; + ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr); + if (ret) + rtc_device_unregister(rtc); + return ret; +} + +static int __exit tx4939_rtc_remove(struct platform_device *pdev) +{ + struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev); + struct rtc_device *rtc = pdata->rtc; + + spin_lock_irq(&rtc->irq_lock); + tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP); + spin_unlock_irq(&rtc->irq_lock); + sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr); + rtc_device_unregister(rtc); + platform_set_drvdata(pdev, NULL); + return 0; +} + +static struct platform_driver tx4939_rtc_driver = { + .remove = __exit_p(tx4939_rtc_remove), + .driver = { + .name = "tx4939rtc", + .owner = THIS_MODULE, + }, +}; + +static int __init tx4939rtc_init(void) +{ + return platform_driver_probe(&tx4939_rtc_driver, tx4939_rtc_probe); +} + +static void __exit tx4939rtc_exit(void) +{ + platform_driver_unregister(&tx4939_rtc_driver); +} + +module_init(tx4939rtc_init); +module_exit(tx4939rtc_exit); + +MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>"); +MODULE_DESCRIPTION("TX4939 internal RTC driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:tx4939rtc"); diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index 834dcc6d785f..f11297aff854 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -27,6 +27,7 @@ #include <linux/rtc.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/log2.h> #include <asm/div64.h> #include <asm/io.h> @@ -84,8 +85,8 @@ static DEFINE_SPINLOCK(rtc_lock); static char rtc_name[] = "RTC"; static unsigned long periodic_count; static unsigned int alarm_enabled; -static int aie_irq = -1; -static int pie_irq = -1; +static int aie_irq; +static int pie_irq; static inline unsigned long read_elapsed_second(void) { @@ -210,6 +211,8 @@ static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq) { unsigned long count; + if (!is_power_of_2(freq)) + return -EINVAL; count = RTC_FREQUENCY; do_div(count, freq); @@ -360,7 +363,7 @@ static int __devinit rtc_probe(struct platform_device *pdev) spin_unlock_irq(&rtc_lock); aie_irq = platform_get_irq(pdev, 0); - if (aie_irq < 0 || aie_irq >= nr_irqs) { + if (aie_irq <= 0) { retval = -EBUSY; goto err_device_unregister; } @@ -371,7 +374,7 @@ static int __devinit rtc_probe(struct platform_device *pdev) goto err_device_unregister; pie_irq = platform_get_irq(pdev, 1); - if (pie_irq < 0 || pie_irq >= nr_irqs) + if (pie_irq <= 0) goto err_free_irq; retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED, diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index b9d0efb6803f..4a6fe01831a8 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -78,7 +78,7 @@ config SPI_AU1550 will be called au1550_spi. config SPI_BITBANG - tristate "Bitbanging SPI master" + tristate "Utilities for Bitbanging SPI masters" help With a few GPIO pins, your system can bitbang the SPI protocol. Select this to get SPI support through I/O pins (GPIO, parallel @@ -100,6 +100,22 @@ config SPI_BUTTERFLY inexpensive battery powered microcontroller evaluation board. This same cable can be used to flash new firmware. +config SPI_GPIO + tristate "GPIO-based bitbanging SPI Master" + depends on GENERIC_GPIO + select SPI_BITBANG + help + This simple GPIO bitbanging SPI master uses the arch-neutral GPIO + interface to manage MOSI, MISO, SCK, and chipselect signals. SPI + slaves connected to a bus using this driver are configured as usual, + except that the spi_board_info.controller_data holds the GPIO number + for the chipselect used by this controller driver. + + Note that this driver often won't achieve even 1 Mbit/sec speeds, + making it unusually slow for SPI. If your platform can inline + GPIO operations, you should be able to leverage that for better + speed with a custom version of this driver; see the source code. + config SPI_IMX tristate "Freescale iMX SPI controller" depends on ARCH_IMX && EXPERIMENTAL diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index ccf18de34e1e..5e9f521b8844 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_SPI_BFIN) += spi_bfin5xx.o obj-$(CONFIG_SPI_BITBANG) += spi_bitbang.o obj-$(CONFIG_SPI_AU1550) += au1550_spi.o obj-$(CONFIG_SPI_BUTTERFLY) += spi_butterfly.o +obj-$(CONFIG_SPI_GPIO) += spi_gpio.o obj-$(CONFIG_SPI_IMX) += spi_imx.o obj-$(CONFIG_SPI_LM70_LLP) += spi_lm70llp.o obj-$(CONFIG_SPI_PXA2XX) += pxa2xx_spi.o diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c index 8abae4ad0fa5..5e39bac9c51b 100644 --- a/drivers/spi/atmel_spi.c +++ b/drivers/spi/atmel_spi.c @@ -30,13 +30,6 @@ * The core SPI transfer engine just talks to a register bank to set up * DMA transfers; transfer queue progress is driven by IRQs. The clock * framework provides the base clock, subdivided for each spi_device. - * - * Newer controllers, marked with "new_1" flag, have: - * - CR.LASTXFER - * - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero) - * - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs) - * - SPI_CSRx.CSAAT - * - SPI_CSRx.SBCR allows faster clocking */ struct atmel_spi { spinlock_t lock; @@ -45,7 +38,6 @@ struct atmel_spi { int irq; struct clk *clk; struct platform_device *pdev; - unsigned new_1:1; struct spi_device *stay; u8 stopping; @@ -59,10 +51,33 @@ struct atmel_spi { dma_addr_t buffer_dma; }; +/* Controller-specific per-slave state */ +struct atmel_spi_device { + unsigned int npcs_pin; + u32 csr; +}; + #define BUFFER_SIZE PAGE_SIZE #define INVALID_DMA_ADDRESS 0xffffffff /* + * Version 2 of the SPI controller has + * - CR.LASTXFER + * - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero) + * - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs) + * - SPI_CSRx.CSAAT + * - SPI_CSRx.SBCR allows faster clocking + * + * We can determine the controller version by reading the VERSION + * register, but I haven't checked that it exists on all chips, and + * this is cheaper anyway. + */ +static bool atmel_spi_is_v2(void) +{ + return !cpu_is_at91rm9200(); +} + +/* * Earlier SPI controllers (e.g. on at91rm9200) have a design bug whereby * they assume that spi slave device state will not change on deselect, so * that automagic deselection is OK. ("NPCSx rises if no data is to be @@ -80,39 +95,58 @@ struct atmel_spi { * Master on Chip Select 0.") No workaround exists for that ... so for * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH, * and (c) will trigger that first erratum in some cases. + * + * TODO: Test if the atmel_spi_is_v2() branch below works on + * AT91RM9200 if we use some other register than CSR0. However, don't + * do this unconditionally since AP7000 has an errata where the BITS + * field in CSR0 overrides all other CSRs. */ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) { - unsigned gpio = (unsigned) spi->controller_data; + struct atmel_spi_device *asd = spi->controller_state; unsigned active = spi->mode & SPI_CS_HIGH; u32 mr; - int i; - u32 csr; - u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0; - - /* Make sure clock polarity is correct */ - for (i = 0; i < spi->master->num_chipselect; i++) { - csr = spi_readl(as, CSR0 + 4 * i); - if ((csr ^ cpol) & SPI_BIT(CPOL)) - spi_writel(as, CSR0 + 4 * i, csr ^ SPI_BIT(CPOL)); - } - mr = spi_readl(as, MR); - mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr); + if (atmel_spi_is_v2()) { + /* + * Always use CSR0. This ensures that the clock + * switches to the correct idle polarity before we + * toggle the CS. + */ + spi_writel(as, CSR0, asd->csr); + spi_writel(as, MR, SPI_BF(PCS, 0x0e) | SPI_BIT(MODFDIS) + | SPI_BIT(MSTR)); + mr = spi_readl(as, MR); + gpio_set_value(asd->npcs_pin, active); + } else { + u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0; + int i; + u32 csr; + + /* Make sure clock polarity is correct */ + for (i = 0; i < spi->master->num_chipselect; i++) { + csr = spi_readl(as, CSR0 + 4 * i); + if ((csr ^ cpol) & SPI_BIT(CPOL)) + spi_writel(as, CSR0 + 4 * i, + csr ^ SPI_BIT(CPOL)); + } + + mr = spi_readl(as, MR); + mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr); + if (spi->chip_select != 0) + gpio_set_value(asd->npcs_pin, active); + spi_writel(as, MR, mr); + } dev_dbg(&spi->dev, "activate %u%s, mr %08x\n", - gpio, active ? " (high)" : "", + asd->npcs_pin, active ? " (high)" : "", mr); - - if (!(cpu_is_at91rm9200() && spi->chip_select == 0)) - gpio_set_value(gpio, active); - spi_writel(as, MR, mr); } static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi) { - unsigned gpio = (unsigned) spi->controller_data; + struct atmel_spi_device *asd = spi->controller_state; unsigned active = spi->mode & SPI_CS_HIGH; u32 mr; @@ -126,11 +160,11 @@ static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi) } dev_dbg(&spi->dev, "DEactivate %u%s, mr %08x\n", - gpio, active ? " (low)" : "", + asd->npcs_pin, active ? " (low)" : "", mr); - if (!(cpu_is_at91rm9200() && spi->chip_select == 0)) - gpio_set_value(gpio, !active); + if (atmel_spi_is_v2() || spi->chip_select != 0) + gpio_set_value(asd->npcs_pin, !active); } static inline int atmel_spi_xfer_is_last(struct spi_message *msg, @@ -502,6 +536,7 @@ atmel_spi_interrupt(int irq, void *dev_id) static int atmel_spi_setup(struct spi_device *spi) { struct atmel_spi *as; + struct atmel_spi_device *asd; u32 scbr, csr; unsigned int bits = spi->bits_per_word; unsigned long bus_hz; @@ -536,19 +571,16 @@ static int atmel_spi_setup(struct spi_device *spi) } /* see notes above re chipselect */ - if (cpu_is_at91rm9200() + if (!atmel_spi_is_v2() && spi->chip_select == 0 && (spi->mode & SPI_CS_HIGH)) { dev_dbg(&spi->dev, "setup: can't be active-high\n"); return -EINVAL; } - /* - * Pre-new_1 chips start out at half the peripheral - * bus speed. - */ + /* v1 chips start out at half the peripheral bus speed. */ bus_hz = clk_get_rate(as->clk); - if (!as->new_1) + if (!atmel_spi_is_v2()) bus_hz /= 2; if (spi->max_speed_hz) { @@ -589,11 +621,20 @@ static int atmel_spi_setup(struct spi_device *spi) /* chipselect must have been muxed as GPIO (e.g. in board setup) */ npcs_pin = (unsigned int)spi->controller_data; - if (!spi->controller_state) { + asd = spi->controller_state; + if (!asd) { + asd = kzalloc(sizeof(struct atmel_spi_device), GFP_KERNEL); + if (!asd) + return -ENOMEM; + ret = gpio_request(npcs_pin, spi->dev.bus_id); - if (ret) + if (ret) { + kfree(asd); return ret; - spi->controller_state = (void *)npcs_pin; + } + + asd->npcs_pin = npcs_pin; + spi->controller_state = asd; gpio_direction_output(npcs_pin, !(spi->mode & SPI_CS_HIGH)); } else { unsigned long flags; @@ -605,11 +646,14 @@ static int atmel_spi_setup(struct spi_device *spi) spin_unlock_irqrestore(&as->lock, flags); } + asd->csr = csr; + dev_dbg(&spi->dev, "setup: %lu Hz bpw %u mode 0x%x -> csr%d %08x\n", bus_hz / scbr, bits, spi->mode, spi->chip_select, csr); - spi_writel(as, CSR0 + 4 * spi->chip_select, csr); + if (!atmel_spi_is_v2()) + spi_writel(as, CSR0 + 4 * spi->chip_select, csr); return 0; } @@ -684,10 +728,11 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg) static void atmel_spi_cleanup(struct spi_device *spi) { struct atmel_spi *as = spi_master_get_devdata(spi->master); + struct atmel_spi_device *asd = spi->controller_state; unsigned gpio = (unsigned) spi->controller_data; unsigned long flags; - if (!spi->controller_state) + if (!asd) return; spin_lock_irqsave(&as->lock, flags); @@ -697,7 +742,9 @@ static void atmel_spi_cleanup(struct spi_device *spi) } spin_unlock_irqrestore(&as->lock, flags); + spi->controller_state = NULL; gpio_free(gpio); + kfree(asd); } /*-------------------------------------------------------------------------*/ @@ -755,8 +802,6 @@ static int __init atmel_spi_probe(struct platform_device *pdev) goto out_free_buffer; as->irq = irq; as->clk = clk; - if (!cpu_is_at91rm9200()) - as->new_1 = 1; ret = request_irq(irq, atmel_spi_interrupt, 0, pdev->dev.bus_id, master); diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c index 6104f461a3cd..d0fc4ca2f656 100644 --- a/drivers/spi/pxa2xx_spi.c +++ b/drivers/spi/pxa2xx_spi.c @@ -1561,11 +1561,12 @@ out_error_master_alloc: static int pxa2xx_spi_remove(struct platform_device *pdev) { struct driver_data *drv_data = platform_get_drvdata(pdev); - struct ssp_device *ssp = drv_data->ssp; + struct ssp_device *ssp; int status = 0; if (!drv_data) return 0; + ssp = drv_data->ssp; /* Remove the queue */ status = destroy_queue(drv_data); diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c new file mode 100644 index 000000000000..49698cabc30d --- /dev/null +++ b/drivers/spi/spi_gpio.c @@ -0,0 +1,360 @@ +/* + * spi_gpio.c - SPI master driver using generic bitbanged GPIO + * + * Copyright (C) 2006,2008 David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/gpio.h> + +#include <linux/spi/spi.h> +#include <linux/spi/spi_bitbang.h> +#include <linux/spi/spi_gpio.h> + + +/* + * This bitbanging SPI master driver should help make systems usable + * when a native hardware SPI engine is not available, perhaps because + * its driver isn't yet working or because the I/O pins it requires + * are used for other purposes. + * + * platform_device->driver_data ... points to spi_gpio + * + * spi->controller_state ... reserved for bitbang framework code + * spi->controller_data ... holds chipselect GPIO + * + * spi->master->dev.driver_data ... points to spi_gpio->bitbang + */ + +struct spi_gpio { + struct spi_bitbang bitbang; + struct spi_gpio_platform_data pdata; + struct platform_device *pdev; +}; + +/*----------------------------------------------------------------------*/ + +/* + * Because the overhead of going through four GPIO procedure calls + * per transferred bit can make performance a problem, this code + * is set up so that you can use it in either of two ways: + * + * - The slow generic way: set up platform_data to hold the GPIO + * numbers used for MISO/MOSI/SCK, and issue procedure calls for + * each of them. This driver can handle several such busses. + * + * - The quicker inlined way: only helps with platform GPIO code + * that inlines operations for constant GPIOs. This can give + * you tight (fast!) inner loops, but each such bus needs a + * new driver. You'll define a new C file, with Makefile and + * Kconfig support; the C code can be a total of six lines: + * + * #define DRIVER_NAME "myboard_spi2" + * #define SPI_MISO_GPIO 119 + * #define SPI_MOSI_GPIO 120 + * #define SPI_SCK_GPIO 121 + * #define SPI_N_CHIPSEL 4 + * #include "spi_gpio.c" + */ + +#ifndef DRIVER_NAME +#define DRIVER_NAME "spi_gpio" + +#define GENERIC_BITBANG /* vs tight inlines */ + +/* all functions referencing these symbols must define pdata */ +#define SPI_MISO_GPIO ((pdata)->miso) +#define SPI_MOSI_GPIO ((pdata)->mosi) +#define SPI_SCK_GPIO ((pdata)->sck) + +#define SPI_N_CHIPSEL ((pdata)->num_chipselect) + +#endif + +/*----------------------------------------------------------------------*/ + +static inline const struct spi_gpio_platform_data * __pure +spi_to_pdata(const struct spi_device *spi) +{ + const struct spi_bitbang *bang; + const struct spi_gpio *spi_gpio; + + bang = spi_master_get_devdata(spi->master); + spi_gpio = container_of(bang, struct spi_gpio, bitbang); + return &spi_gpio->pdata; +} + +/* this is #defined to avoid unused-variable warnings when inlining */ +#define pdata spi_to_pdata(spi) + +static inline void setsck(const struct spi_device *spi, int is_on) +{ + gpio_set_value(SPI_SCK_GPIO, is_on); +} + +static inline void setmosi(const struct spi_device *spi, int is_on) +{ + gpio_set_value(SPI_MOSI_GPIO, is_on); +} + +static inline int getmiso(const struct spi_device *spi) +{ + return gpio_get_value(SPI_MISO_GPIO); +} + +#undef pdata + +/* + * NOTE: this clocks "as fast as we can". It "should" be a function of the + * requested device clock. Software overhead means we usually have trouble + * reaching even one Mbit/sec (except when we can inline bitops), so for now + * we'll just assume we never need additional per-bit slowdowns. + */ +#define spidelay(nsecs) do {} while (0) + +#define EXPAND_BITBANG_TXRX +#include <linux/spi/spi_bitbang.h> + +/* + * These functions can leverage inline expansion of GPIO calls to shrink + * costs for a txrx bit, often by factors of around ten (by instruction + * count). That is particularly visible for larger word sizes, but helps + * even with default 8-bit words. + * + * REVISIT overheads calling these functions for each word also have + * significant performance costs. Having txrx_bufs() calls that inline + * the txrx_word() logic would help performance, e.g. on larger blocks + * used with flash storage or MMC/SD. There should also be ways to make + * GCC be less stupid about reloading registers inside the I/O loops, + * even without inlined GPIO calls; __attribute__((hot)) on GCC 4.3? + */ + +static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi, + unsigned nsecs, u32 word, u8 bits) +{ + return bitbang_txrx_be_cpha0(spi, nsecs, 0, word, bits); +} + +static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi, + unsigned nsecs, u32 word, u8 bits) +{ + return bitbang_txrx_be_cpha1(spi, nsecs, 0, word, bits); +} + +static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi, + unsigned nsecs, u32 word, u8 bits) +{ + return bitbang_txrx_be_cpha0(spi, nsecs, 1, word, bits); +} + +static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi, + unsigned nsecs, u32 word, u8 bits) +{ + return bitbang_txrx_be_cpha1(spi, nsecs, 1, word, bits); +} + +/*----------------------------------------------------------------------*/ + +static void spi_gpio_chipselect(struct spi_device *spi, int is_active) +{ + unsigned long cs = (unsigned long) spi->controller_data; + + /* set initial clock polarity */ + if (is_active) + setsck(spi, spi->mode & SPI_CPOL); + + /* SPI is normally active-low */ + gpio_set_value(cs, (spi->mode & SPI_CS_HIGH) ? is_active : !is_active); +} + +static int spi_gpio_setup(struct spi_device *spi) +{ + unsigned long cs = (unsigned long) spi->controller_data; + int status = 0; + + if (spi->bits_per_word > 32) + return -EINVAL; + + if (!spi->controller_state) { + status = gpio_request(cs, spi->dev.bus_id); + if (status) + return status; + status = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH); + } + if (!status) + status = spi_bitbang_setup(spi); + if (status) { + if (!spi->controller_state) + gpio_free(cs); + } + return status; +} + +static void spi_gpio_cleanup(struct spi_device *spi) +{ + unsigned long cs = (unsigned long) spi->controller_data; + + gpio_free(cs); + spi_bitbang_cleanup(spi); +} + +static int __init spi_gpio_alloc(unsigned pin, const char *label, bool is_in) +{ + int value; + + value = gpio_request(pin, label); + if (value == 0) { + if (is_in) + value = gpio_direction_input(pin); + else + value = gpio_direction_output(pin, 0); + } + return value; +} + +static int __init +spi_gpio_request(struct spi_gpio_platform_data *pdata, const char *label) +{ + int value; + + /* NOTE: SPI_*_GPIO symbols may reference "pdata" */ + + value = spi_gpio_alloc(SPI_MOSI_GPIO, label, false); + if (value) + goto done; + + value = spi_gpio_alloc(SPI_MISO_GPIO, label, true); + if (value) + goto free_mosi; + + value = spi_gpio_alloc(SPI_SCK_GPIO, label, false); + if (value) + goto free_miso; + + goto done; + +free_miso: + gpio_free(SPI_MISO_GPIO); +free_mosi: + gpio_free(SPI_MOSI_GPIO); +done: + return value; +} + +static int __init spi_gpio_probe(struct platform_device *pdev) +{ + int status; + struct spi_master *master; + struct spi_gpio *spi_gpio; + struct spi_gpio_platform_data *pdata; + + pdata = pdev->dev.platform_data; +#ifdef GENERIC_BITBANG + if (!pdata || !pdata->num_chipselect) + return -ENODEV; +#endif + + status = spi_gpio_request(pdata, dev_name(&pdev->dev)); + if (status < 0) + return status; + + master = spi_alloc_master(&pdev->dev, sizeof *spi_gpio); + if (!master) { + status = -ENOMEM; + goto gpio_free; + } + spi_gpio = spi_master_get_devdata(master); + platform_set_drvdata(pdev, spi_gpio); + + spi_gpio->pdev = pdev; + if (pdata) + spi_gpio->pdata = *pdata; + + master->bus_num = pdev->id; + master->num_chipselect = SPI_N_CHIPSEL; + master->setup = spi_gpio_setup; + master->cleanup = spi_gpio_cleanup; + + spi_gpio->bitbang.master = spi_master_get(master); + spi_gpio->bitbang.chipselect = spi_gpio_chipselect; + spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0; + spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1; + spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2; + spi_gpio->bitbang.txrx_word[SPI_MODE_3] = spi_gpio_txrx_word_mode3; + spi_gpio->bitbang.setup_transfer = spi_bitbang_setup_transfer; + spi_gpio->bitbang.flags = SPI_CS_HIGH; + + status = spi_bitbang_start(&spi_gpio->bitbang); + if (status < 0) { + spi_master_put(spi_gpio->bitbang.master); +gpio_free: + gpio_free(SPI_MISO_GPIO); + gpio_free(SPI_MOSI_GPIO); + gpio_free(SPI_SCK_GPIO); + spi_master_put(master); + } + + return status; +} + +static int __exit spi_gpio_remove(struct platform_device *pdev) +{ + struct spi_gpio *spi_gpio; + struct spi_gpio_platform_data *pdata; + int status; + + spi_gpio = platform_get_drvdata(pdev); + pdata = pdev->dev.platform_data; + + /* stop() unregisters child devices too */ + status = spi_bitbang_stop(&spi_gpio->bitbang); + spi_master_put(spi_gpio->bitbang.master); + + platform_set_drvdata(pdev, NULL); + + gpio_free(SPI_MISO_GPIO); + gpio_free(SPI_MOSI_GPIO); + gpio_free(SPI_SCK_GPIO); + + return status; +} + +MODULE_ALIAS("platform:" DRIVER_NAME); + +static struct platform_driver spi_gpio_driver = { + .driver.name = DRIVER_NAME, + .driver.owner = THIS_MODULE, + .remove = __exit_p(spi_gpio_remove), +}; + +static int __init spi_gpio_init(void) +{ + return platform_driver_probe(&spi_gpio_driver, spi_gpio_probe); +} +module_init(spi_gpio_init); + +static void __exit spi_gpio_exit(void) +{ + platform_driver_unregister(&spi_gpio_driver); +} +module_exit(spi_gpio_exit); + + +MODULE_DESCRIPTION("SPI master driver using generic bitbanged GPIO "); +MODULE_AUTHOR("David Brownell"); +MODULE_LICENSE("GPL"); diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c index 256d18395a23..b3ebc1d0f85f 100644 --- a/drivers/spi/spi_s3c24xx.c +++ b/drivers/spi/spi_s3c24xx.c @@ -19,6 +19,7 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/platform_device.h> +#include <linux/gpio.h> #include <linux/spi/spi.h> #include <linux/spi/spi_bitbang.h> @@ -27,7 +28,6 @@ #include <asm/dma.h> #include <mach/hardware.h> -#include <mach/regs-gpio.h> #include <plat/regs-spi.h> #include <mach/spi.h> @@ -66,7 +66,7 @@ static inline struct s3c24xx_spi *to_hw(struct spi_device *sdev) static void s3c24xx_spi_gpiocs(struct s3c2410_spi_info *spi, int cs, int pol) { - s3c2410_gpio_setpin(spi->pin_cs, pol); + gpio_set_value(spi->pin_cs, pol); } static void s3c24xx_spi_chipsel(struct spi_device *spi, int value) @@ -248,8 +248,13 @@ static void s3c24xx_spi_initialsetup(struct s3c24xx_spi *hw) writeb(SPPIN_DEFAULT, hw->regs + S3C2410_SPPIN); writeb(SPCON_DEFAULT, hw->regs + S3C2410_SPCON); - if (hw->pdata && hw->pdata->gpio_setup) - hw->pdata->gpio_setup(hw->pdata, 1); + if (hw->pdata) { + if (hw->set_cs == s3c24xx_spi_gpiocs) + gpio_direction_output(hw->pdata->pin_cs, 1); + + if (hw->pdata->gpio_setup) + hw->pdata->gpio_setup(hw->pdata, 1); + } } static int __init s3c24xx_spi_probe(struct platform_device *pdev) @@ -343,18 +348,27 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev) goto err_no_clk; } - s3c24xx_spi_initialsetup(hw); - /* setup any gpio we can */ if (!pdata->set_cs) { - hw->set_cs = s3c24xx_spi_gpiocs; + if (pdata->pin_cs < 0) { + dev_err(&pdev->dev, "No chipselect pin\n"); + goto err_register; + } - s3c2410_gpio_setpin(pdata->pin_cs, 1); - s3c2410_gpio_cfgpin(pdata->pin_cs, S3C2410_GPIO_OUTPUT); + err = gpio_request(pdata->pin_cs, dev_name(&pdev->dev)); + if (err) { + dev_err(&pdev->dev, "Failed to get gpio for cs\n"); + goto err_register; + } + + hw->set_cs = s3c24xx_spi_gpiocs; + gpio_direction_output(pdata->pin_cs, 1); } else hw->set_cs = pdata->set_cs; + s3c24xx_spi_initialsetup(hw); + /* register our spi controller */ err = spi_bitbang_start(&hw->bitbang); @@ -366,6 +380,9 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev) return 0; err_register: + if (hw->set_cs == s3c24xx_spi_gpiocs) + gpio_free(pdata->pin_cs); + clk_disable(hw->clk); clk_put(hw->clk); @@ -401,6 +418,9 @@ static int __exit s3c24xx_spi_remove(struct platform_device *dev) free_irq(hw->irq, hw); iounmap(hw->regs); + if (hw->set_cs == s3c24xx_spi_gpiocs) + gpio_free(hw->pdata->pin_cs); + release_resource(hw->ioarea); kfree(hw->ioarea); diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 243ea4ab20c8..db16112cf197 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -2051,7 +2051,7 @@ static int __devinit aty128_probe(struct pci_dev *pdev, const struct pci_device_ /* Virtualize mmio region */ info->fix.mmio_start = reg_addr; - par->regbase = ioremap(reg_addr, pci_resource_len(pdev, 2)); + par->regbase = pci_ioremap_bar(pdev, 2); if (!par->regbase) goto err_free_info; diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c index 7d1b819e501c..a9b3ada05d99 100644 --- a/drivers/video/bfin-t350mcqb-fb.c +++ b/drivers/video/bfin-t350mcqb-fb.c @@ -255,7 +255,7 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var, { if (var->bits_per_pixel != LCD_BPP) { - pr_debug("%s: depth not supported: %u BPP\n", __FUNCTION__, + pr_debug("%s: depth not supported: %u BPP\n", __func__, var->bits_per_pixel); return -EINVAL; } @@ -264,7 +264,7 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var, info->var.xres_virtual != var->xres_virtual || info->var.yres_virtual != var->yres_virtual) { pr_debug("%s: Resolution not supported: X%u x Y%u \n", - __FUNCTION__, var->xres, var->yres); + __func__, var->xres, var->yres); return -EINVAL; } @@ -274,7 +274,7 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var, if ((info->fix.line_length * var->yres_virtual) > info->fix.smem_len) { pr_debug("%s: Memory Limit requested yres_virtual = %u\n", - __FUNCTION__, var->yres_virtual); + __func__, var->yres_virtual); return -ENOMEM; } diff --git a/drivers/video/carminefb.c b/drivers/video/carminefb.c index c9b191319a9a..c7ff3c1a266a 100644 --- a/drivers/video/carminefb.c +++ b/drivers/video/carminefb.c @@ -168,7 +168,7 @@ static int carmine_setcolreg(unsigned regno, unsigned red, unsigned green, blue >>= 8; transp >>= 8; - ((u32 *)info->pseudo_palette)[regno] = be32_to_cpu(transp << 24 | + ((__be32 *)info->pseudo_palette)[regno] = cpu_to_be32(transp << 24 | red << 0 | green << 8 | blue << 16); return 0; } diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c index 39d5d643a50b..7a9e42e3a9a9 100644 --- a/drivers/video/cyber2000fb.c +++ b/drivers/video/cyber2000fb.c @@ -1583,8 +1583,7 @@ cyberpro_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) goto failed_release; cfb->dev = dev; - cfb->region = ioremap(pci_resource_start(dev, 0), - pci_resource_len(dev, 0)); + cfb->region = pci_ioremap_bar(dev, 0); if (!cfb->region) goto failed_ioremap; diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 3c65b0d67617..756efeb91abc 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -510,6 +510,10 @@ static int fb_prepare_extra_logos(struct fb_info *info, unsigned int height, fb_logo_ex_num = 0; for (i = 0; i < fb_logo_ex_num; i++) { + if (fb_logo_ex[i].logo->type != fb_logo.logo->type) { + fb_logo_ex[i].logo = NULL; + continue; + } height += fb_logo_ex[i].logo->height; if (height > yres) { height -= fb_logo_ex[i].logo->height; diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index f89c3cce1e0c..fe5b519860b1 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -912,6 +912,7 @@ static int gbefb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { unsigned int line_length; struct gbe_timing_info timing; + int ret; /* Limit bpp to 8, 16, and 32 */ if (var->bits_per_pixel <= 8) @@ -930,8 +931,10 @@ static int gbefb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) var->grayscale = 0; /* No grayscale for now */ - if ((var->pixclock = compute_gbe_timing(var, &timing)) < 0) - return(-EINVAL); + ret = compute_gbe_timing(var, &timing); + var->pixclock = ret; + if (ret < 0) + return -EINVAL; /* Adjust virtual resolution, if necessary */ if (var->xres > var->xres_virtual || (!ywrap && !ypan)) diff --git a/drivers/video/geode/gx1fb_core.c b/drivers/video/geode/gx1fb_core.c index bb20a2289760..751e491ca8c8 100644 --- a/drivers/video/geode/gx1fb_core.c +++ b/drivers/video/geode/gx1fb_core.c @@ -217,8 +217,7 @@ static int __init gx1fb_map_video_memory(struct fb_info *info, struct pci_dev *d ret = pci_request_region(dev, 0, "gx1fb (video)"); if (ret < 0) return ret; - par->vid_regs = ioremap(pci_resource_start(dev, 0), - pci_resource_len(dev, 0)); + par->vid_regs = pci_ioremap_bar(dev, 0); if (!par->vid_regs) return -ENOMEM; diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c index de2b8f9876a5..484118926318 100644 --- a/drivers/video/geode/gxfb_core.c +++ b/drivers/video/geode/gxfb_core.c @@ -242,23 +242,21 @@ static int __init gxfb_map_video_memory(struct fb_info *info, struct pci_dev *de ret = pci_request_region(dev, 3, "gxfb (video processor)"); if (ret < 0) return ret; - par->vid_regs = ioremap(pci_resource_start(dev, 3), - pci_resource_len(dev, 3)); + par->vid_regs = pci_ioremap_bar(dev, 3); if (!par->vid_regs) return -ENOMEM; ret = pci_request_region(dev, 2, "gxfb (display controller)"); if (ret < 0) return ret; - par->dc_regs = ioremap(pci_resource_start(dev, 2), pci_resource_len(dev, 2)); + par->dc_regs = pci_ioremap_bar(dev, 2); if (!par->dc_regs) return -ENOMEM; ret = pci_request_region(dev, 1, "gxfb (graphics processor)"); if (ret < 0) return ret; - par->gp_regs = ioremap(pci_resource_start(dev, 1), - pci_resource_len(dev, 1)); + par->gp_regs = pci_ioremap_bar(dev, 1); if (!par->gp_regs) return -ENOMEM; diff --git a/drivers/video/geode/lxfb_core.c b/drivers/video/geode/lxfb_core.c index 2cd9b74d2225..b965ecdbc604 100644 --- a/drivers/video/geode/lxfb_core.c +++ b/drivers/video/geode/lxfb_core.c @@ -379,20 +379,17 @@ static int __init lxfb_map_video_memory(struct fb_info *info, if (info->screen_base == NULL) return ret; - par->gp_regs = ioremap(pci_resource_start(dev, 1), - pci_resource_len(dev, 1)); + par->gp_regs = pci_ioremap_bar(dev, 1); if (par->gp_regs == NULL) return ret; - par->dc_regs = ioremap(pci_resource_start(dev, 2), - pci_resource_len(dev, 2)); + par->dc_regs = pci_ioremap_bar(dev, 2); if (par->dc_regs == NULL) return ret; - par->vp_regs = ioremap(pci_resource_start(dev, 3), - pci_resource_len(dev, 3)); + par->vp_regs = pci_ioremap_bar(dev, 3); if (par->vp_regs == NULL) return ret; diff --git a/drivers/video/gxt4500.c b/drivers/video/gxt4500.c index 564557792bed..896e53dea906 100644 --- a/drivers/video/gxt4500.c +++ b/drivers/video/gxt4500.c @@ -648,7 +648,7 @@ static int __devinit gxt4500_probe(struct pci_dev *pdev, info->pseudo_palette = par->pseudo_palette; info->fix.mmio_start = reg_phys; - par->regs = ioremap(reg_phys, pci_resource_len(pdev, 0)); + par->regs = pci_ioremap_bar(pdev, 0); if (!par->regs) { dev_err(&pdev->dev, "gxt4500: cannot map registers\n"); goto err_free_all; @@ -656,7 +656,7 @@ static int __devinit gxt4500_probe(struct pci_dev *pdev, info->fix.smem_start = fb_phys; info->fix.smem_len = pci_resource_len(pdev, 1); - info->screen_base = ioremap(fb_phys, pci_resource_len(pdev, 1)); + info->screen_base = pci_ioremap_bar(pdev, 1); if (!info->screen_base) { dev_err(&pdev->dev, "gxt4500: cannot map framebuffer\n"); goto err_unmap_regs; diff --git a/drivers/video/i810/i810_accel.c b/drivers/video/i810/i810_accel.c index 76764ea3486a..f5bedee4310a 100644 --- a/drivers/video/i810/i810_accel.c +++ b/drivers/video/i810/i810_accel.c @@ -301,8 +301,10 @@ void i810fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) u32 dx, dy, width, height, dest, rop = 0, color = 0; if (!info->var.accel_flags || par->dev_flags & LOCKUP || - par->depth == 4) - return cfb_fillrect(info, rect); + par->depth == 4) { + cfb_fillrect(info, rect); + return; + } if (par->depth == 1) color = rect->color; @@ -327,8 +329,10 @@ void i810fb_copyarea(struct fb_info *info, const struct fb_copyarea *region) u32 sx, sy, dx, dy, pitch, width, height, src, dest, xdir; if (!info->var.accel_flags || par->dev_flags & LOCKUP || - par->depth == 4) - return cfb_copyarea(info, region); + par->depth == 4) { + cfb_copyarea(info, region); + return; + } dx = region->dx * par->depth; sx = region->sx * par->depth; @@ -366,8 +370,10 @@ void i810fb_imageblit(struct fb_info *info, const struct fb_image *image) u32 fg = 0, bg = 0, size, dst; if (!info->var.accel_flags || par->dev_flags & LOCKUP || - par->depth == 4 || image->depth != 1) - return cfb_imageblit(info, image); + par->depth == 4 || image->depth != 1) { + cfb_imageblit(info, image); + return; + } switch (info->var.bits_per_pixel) { case 8: diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c index a09e23649357..6d8e5415c809 100644 --- a/drivers/video/intelfb/intelfbdrv.c +++ b/drivers/video/intelfb/intelfbdrv.c @@ -1493,8 +1493,10 @@ static void intelfb_fillrect (struct fb_info *info, DBG_MSG("intelfb_fillrect\n"); #endif - if (!ACCEL(dinfo, info) || dinfo->depth == 4) - return cfb_fillrect(info, rect); + if (!ACCEL(dinfo, info) || dinfo->depth == 4) { + cfb_fillrect(info, rect); + return; + } if (rect->rop == ROP_COPY) rop = PAT_ROP_GXCOPY; @@ -1521,8 +1523,10 @@ static void intelfb_copyarea(struct fb_info *info, DBG_MSG("intelfb_copyarea\n"); #endif - if (!ACCEL(dinfo, info) || dinfo->depth == 4) - return cfb_copyarea(info, region); + if (!ACCEL(dinfo, info) || dinfo->depth == 4) { + cfb_copyarea(info, region); + return; + } intelfbhw_do_bitblt(dinfo, region->sx, region->sy, region->dx, region->dy, region->width, region->height, @@ -1540,8 +1544,10 @@ static void intelfb_imageblit(struct fb_info *info, #endif if (!ACCEL(dinfo, info) || dinfo->depth == 4 - || image->depth != 1) - return cfb_imageblit(info, image); + || image->depth != 1) { + cfb_imageblit(info, image); + return; + } if (dinfo->depth != 8) { fgcolor = dinfo->pseudo_palette[image->fg_color]; @@ -1554,8 +1560,10 @@ static void intelfb_imageblit(struct fb_info *info, if (!intelfbhw_do_drawglyph(dinfo, fgcolor, bgcolor, image->width, image->height, image->data, image->dx, image->dy, - dinfo->pitch, info->var.bits_per_pixel)) - return cfb_imageblit(info, image); + dinfo->pitch, info->var.bits_per_pixel)) { + cfb_imageblit(info, image); + return; + } } static int intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor) diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c index d3c3af53a290..16186240c5f2 100644 --- a/drivers/video/modedb.c +++ b/drivers/video/modedb.c @@ -329,7 +329,7 @@ const struct fb_videomode vesa_modes[] = { FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA }, /* 17 1152x864-75 VESA */ - { NULL, 75, 1153, 864, 9259, 256, 64, 32, 1, 128, 3, + { NULL, 75, 1152, 864, 9259, 256, 64, 32, 1, 128, 3, FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA }, /* 18 1280x960-60 VESA */ diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c index bfb802d26d5a..588527a254c2 100644 --- a/drivers/video/neofb.c +++ b/drivers/video/neofb.c @@ -1453,7 +1453,8 @@ neo2200_imageblit(struct fb_info *info, const struct fb_image *image) * is less than 16 bits wide. This is due to insufficient * padding when writing the image. We need to adjust * struct fb_pixmap. Not yet done. */ - return cfb_imageblit(info, image); + cfb_imageblit(info, image); + return; } bltCntl_flags = NEO_BC0_SRC_MONO; } else if (image->depth == info->var.bits_per_pixel) { @@ -1461,7 +1462,8 @@ neo2200_imageblit(struct fb_info *info, const struct fb_image *image) } else { /* We don't currently support hardware acceleration if image * depth is different from display */ - return cfb_imageblit(info, image); + cfb_imageblit(info, image); + return; } switch (info->var.bits_per_pixel) { diff --git a/drivers/video/nvidia/nv_accel.c b/drivers/video/nvidia/nv_accel.c index fa4821c5572b..ad6472a894ea 100644 --- a/drivers/video/nvidia/nv_accel.c +++ b/drivers/video/nvidia/nv_accel.c @@ -300,8 +300,10 @@ void nvidiafb_copyarea(struct fb_info *info, const struct fb_copyarea *region) if (info->state != FBINFO_STATE_RUNNING) return; - if (par->lockup) - return cfb_copyarea(info, region); + if (par->lockup) { + cfb_copyarea(info, region); + return; + } NVDmaStart(info, par, BLIT_POINT_SRC, 3); NVDmaNext(par, (region->sy << 16) | region->sx); @@ -319,8 +321,10 @@ void nvidiafb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) if (info->state != FBINFO_STATE_RUNNING) return; - if (par->lockup) - return cfb_fillrect(info, rect); + if (par->lockup) { + cfb_fillrect(info, rect); + return; + } if (info->var.bits_per_pixel == 8) color = rect->color; diff --git a/drivers/video/pm3fb.c b/drivers/video/pm3fb.c index 68089d1456c2..6666f45a2f8c 100644 --- a/drivers/video/pm3fb.c +++ b/drivers/video/pm3fb.c @@ -539,8 +539,10 @@ static void pm3fb_imageblit(struct fb_info *info, const struct fb_image *image) bgx = par->palette[image->bg_color]; break; } - if (image->depth != 1) - return cfb_imageblit(info, image); + if (image->depth != 1) { + cfb_imageblit(info, image); + return; + } if (info->var.bits_per_pixel == 8) { fgx |= fgx << 8; diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index f94ae84a58cd..dcd98793d568 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -159,6 +159,9 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem, break; case SM501_MEMF_PANEL: + if (size > inf->fbmem_len) + return -ENOMEM; + ptr = inf->fbmem_len - size; fbi = inf->fb[HEAD_CRT]; @@ -172,9 +175,6 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem, if (fbi && ptr < fbi->fix.smem_len) return -ENOMEM; - if (ptr < 0) - return -ENOMEM; - break; case SM501_MEMF_CRT: diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c index e21fe5b6f9ff..37b433a08ce8 100644 --- a/drivers/video/via/viafbdev.c +++ b/drivers/video/via/viafbdev.c @@ -870,8 +870,10 @@ static void viafb_fillrect(struct fb_info *info, u32 col = 0, rop = 0; int pitch; - if (!viafb_accel) - return cfb_fillrect(info, rect); + if (!viafb_accel) { + cfb_fillrect(info, rect); + return; + } if (!rect->width || !rect->height) return; @@ -937,8 +939,10 @@ static void viafb_copyarea(struct fb_info *info, DEBUG_MSG(KERN_INFO "viafb_copyarea!!\n"); - if (!viafb_accel) - return cfb_copyarea(info, area); + if (!viafb_accel) { + cfb_copyarea(info, area); + return; + } if (!area->width || !area->height) return; @@ -994,8 +998,10 @@ static void viafb_imageblit(struct fb_info *info, int i; int pitch; - if (!viafb_accel) - return cfb_imageblit(info, image); + if (!viafb_accel) { + cfb_imageblit(info, image); + return; + } udata = (u32 *) image->data; diff --git a/firmware/dsp56k/bootstrap.asm b/firmware/dsp56k/bootstrap.asm index 10d891929cd3..a411047e6db3 100644 --- a/firmware/dsp56k/bootstrap.asm +++ b/firmware/dsp56k/bootstrap.asm @@ -51,19 +51,19 @@ start jmp <$40 ; Copy DSP program control move #real,r0 move #upload,r1 - do #upload_end-upload,<_copy - move P:(r0)+,x0 - move x0,P:(r1)+ -_copy movep #>4,X:<<M_HCR - movep #>$c00,X:<<M_IPR + do #upload_end-upload,_copy + movem P:(r0)+,x0 + movem x0,P:(r1)+ +_copy movep #4,X:<<M_HCR + movep #$c00,X:<<M_IPR and #<$fe,mr jmp upload real org P:$7ea9 upload - movep #>1,X:<<M_PBC - movep #>0,X:<<M_BCR + movep #1,X:<<M_PBC + movep #0,X:<<M_BCR next jclr #0,X:<<M_HSR,* movep X:<<M_HRX,A @@ -81,18 +81,18 @@ _get_length cmp x0,A jeq load_Y -load_P do y0,_load +load_P do y0,_load_P jclr #0,X:<<M_HSR,* movep X:<<M_HRX,P:(r0)+ -_load jmp next -load_X do y0,_load +_load_P jmp next +load_X do y0,_load_X jclr #0,X:<<M_HSR,* movep X:<<M_HRX,X:(r0)+ -_load jmp next -load_Y do y0,_load +_load_X jmp next +load_Y do y0,_load_Y jclr #0,X:<<M_HSR,* movep X:<<M_HRX,Y:(r0)+ -_load jmp next +_load_Y jmp next upload_end end diff --git a/fs/Kconfig b/fs/Kconfig index f9b6e2979aaa..32883589ee54 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -721,7 +721,20 @@ config CONFIGFS_FS endmenu -menu "Miscellaneous filesystems" +menuconfig MISC_FILESYSTEMS + bool "Miscellaneous filesystems" + default y + ---help--- + Say Y here to get to see options for various miscellaneous + filesystems, such as filesystems that came from other + operating systems. + + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled; if unsure, say Y here. + +if MISC_FILESYSTEMS config ADFS_FS tristate "ADFS file system support (EXPERIMENTAL)" @@ -1091,7 +1104,7 @@ config UFS_DEBUG Y here. This will result in _many_ additional debugging messages to be written to the system log. -endmenu +endif # MISC_FILESYSTEMS menuconfig NETWORK_FILESYSTEMS bool "Network File Systems" diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index e0f16da00e54..a76803108d06 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -25,8 +25,6 @@ #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) #define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) -#define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET) - #include <linux/kernel.h> #include <linux/slab.h> #include <linux/time.h> diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 63b7c7afe8df..025e105bffea 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -124,7 +124,7 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) /* * Check sanity of parameter control fields and if a path is present - * check that it has a "/" and is terminated. + * check that it is terminated and contains at least one "/". */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { @@ -138,15 +138,16 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) } if (param->size > sizeof(*param)) { - err = check_name(param->path); + err = invalid_str(param->path, + (void *) ((size_t) param + param->size)); if (err) { - AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", - cmd); + AUTOFS_WARN( + "path string terminator missing for cmd(0x%08x)", + cmd); goto out; } - err = invalid_str(param->path, - (void *) ((size_t) param + param->size)); + err = check_name(param->path); if (err) { AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", cmd); @@ -180,7 +181,7 @@ static int autofs_dev_ioctl_protover(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - param->arg1 = sbi->version; + param->protover.version = sbi->version; return 0; } @@ -189,7 +190,7 @@ static int autofs_dev_ioctl_protosubver(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - param->arg1 = sbi->sub_version; + param->protosubver.sub_version = sbi->sub_version; return 0; } @@ -335,13 +336,13 @@ static int autofs_dev_ioctl_openmount(struct file *fp, int err, fd; /* param->path has already been checked */ - if (!param->arg1) + if (!param->openmount.devid) return -EINVAL; param->ioctlfd = -1; path = param->path; - devid = param->arg1; + devid = param->openmount.devid; err = 0; fd = autofs_dev_ioctl_open_mountpoint(path, devid); @@ -373,7 +374,7 @@ static int autofs_dev_ioctl_ready(struct file *fp, { autofs_wqt_t token; - token = (autofs_wqt_t) param->arg1; + token = (autofs_wqt_t) param->ready.token; return autofs4_wait_release(sbi, token, 0); } @@ -388,8 +389,8 @@ static int autofs_dev_ioctl_fail(struct file *fp, autofs_wqt_t token; int status; - token = (autofs_wqt_t) param->arg1; - status = param->arg2 ? param->arg2 : -ENOENT; + token = (autofs_wqt_t) param->fail.token; + status = param->fail.status ? param->fail.status : -ENOENT; return autofs4_wait_release(sbi, token, status); } @@ -412,10 +413,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, int pipefd; int err = 0; - if (param->arg1 == -1) + if (param->setpipefd.pipefd == -1) return -EINVAL; - pipefd = param->arg1; + pipefd = param->setpipefd.pipefd; mutex_lock(&sbi->wq_mutex); if (!sbi->catatonic) { @@ -457,8 +458,8 @@ static int autofs_dev_ioctl_timeout(struct file *fp, { unsigned long timeout; - timeout = param->arg1; - param->arg1 = sbi->exp_timeout / HZ; + timeout = param->timeout.timeout; + param->timeout.timeout = sbi->exp_timeout / HZ; sbi->exp_timeout = timeout * HZ; return 0; } @@ -489,7 +490,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, path = param->path; devid = sbi->sb->s_dev; - param->arg1 = param->arg2 = -1; + param->requester.uid = param->requester.gid = -1; /* Get nameidata of the parent directory */ err = path_lookup(path, LOOKUP_PARENT, &nd); @@ -505,8 +506,8 @@ static int autofs_dev_ioctl_requester(struct file *fp, err = 0; autofs4_expire_wait(nd.path.dentry); spin_lock(&sbi->fs_lock); - param->arg1 = ino->uid; - param->arg2 = ino->gid; + param->requester.uid = ino->uid; + param->requester.gid = ino->gid; spin_unlock(&sbi->fs_lock); } @@ -529,10 +530,10 @@ static int autofs_dev_ioctl_expire(struct file *fp, int err = -EAGAIN; int how; - how = param->arg1; + how = param->expire.how; mnt = fp->f_path.mnt; - if (sbi->type & AUTOFS_TYPE_TRIGGER) + if (autofs_type_trigger(sbi->type)) dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how); else dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how); @@ -565,9 +566,9 @@ static int autofs_dev_ioctl_askumount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - param->arg1 = 0; + param->askumount.may_umount = 0; if (may_umount(fp->f_path.mnt)) - param->arg1 = 1; + param->askumount.may_umount = 1; return 0; } @@ -600,6 +601,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, struct nameidata nd; const char *path; unsigned int type; + unsigned int devid, magic; int err = -ENOENT; if (param->size <= sizeof(*param)) { @@ -608,13 +610,13 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, } path = param->path; - type = param->arg1; + type = param->ismountpoint.in.type; - param->arg1 = 0; - param->arg2 = 0; + param->ismountpoint.out.devid = devid = 0; + param->ismountpoint.out.magic = magic = 0; if (!fp || param->ioctlfd == -1) { - if (type == AUTOFS_TYPE_ANY) { + if (autofs_type_any(type)) { struct super_block *sb; err = path_lookup(path, LOOKUP_FOLLOW, &nd); @@ -622,7 +624,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, goto out; sb = nd.path.dentry->d_sb; - param->arg1 = new_encode_dev(sb->s_dev); + devid = new_encode_dev(sb->s_dev); } else { struct autofs_info *ino; @@ -635,38 +637,41 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, goto out_release; ino = autofs4_dentry_ino(nd.path.dentry); - param->arg1 = autofs4_get_dev(ino->sbi); + devid = autofs4_get_dev(ino->sbi); } err = 0; if (nd.path.dentry->d_inode && nd.path.mnt->mnt_root == nd.path.dentry) { err = 1; - param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic; + magic = nd.path.dentry->d_inode->i_sb->s_magic; } } else { - dev_t devid = new_encode_dev(sbi->sb->s_dev); + dev_t dev = autofs4_get_dev(sbi); err = path_lookup(path, LOOKUP_PARENT, &nd); if (err) goto out; - err = autofs_dev_ioctl_find_super(&nd, devid); + err = autofs_dev_ioctl_find_super(&nd, dev); if (err) goto out_release; - param->arg1 = autofs4_get_dev(sbi); + devid = dev; err = have_submounts(nd.path.dentry); if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { if (follow_down(&nd.path.mnt, &nd.path.dentry)) { struct inode *inode = nd.path.dentry->d_inode; - param->arg2 = inode->i_sb->s_magic; + magic = inode->i_sb->s_magic; } } } + param->ismountpoint.out.devid = devid; + param->ismountpoint.out.magic = magic; + out_release: path_put(&nd.path); out: diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 4b6fb3f628c0..e3bd50776f9e 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -63,7 +63,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); /* This is an autofs submount, we can't expire it */ - if (sbi->type == AUTOFS_TYPE_INDIRECT) + if (autofs_type_indirect(sbi->type)) goto done; /* @@ -490,7 +490,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, if (arg && get_user(do_now, arg)) return -EFAULT; - if (sbi->type & AUTOFS_TYPE_TRIGGER) + if (autofs_type_trigger(sbi->type)) dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); else dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index cfc23e53b6f4..716e12b627b2 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -197,9 +197,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); - if (sbi->type & AUTOFS_TYPE_OFFSET) + if (autofs_type_offset(sbi->type)) seq_printf(m, ",offset"); - else if (sbi->type & AUTOFS_TYPE_DIRECT) + else if (autofs_type_direct(sbi->type)) seq_printf(m, ",direct"); else seq_printf(m, ",indirect"); @@ -284,13 +284,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, *maxproto = option; break; case Opt_indirect: - *type = AUTOFS_TYPE_INDIRECT; + set_autofs_type_indirect(type); break; case Opt_direct: - *type = AUTOFS_TYPE_DIRECT; + set_autofs_type_direct(type); break; case Opt_offset: - *type = AUTOFS_TYPE_OFFSET; + set_autofs_type_offset(type); break; default: return 1; @@ -338,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; - sbi->type = AUTOFS_TYPE_INDIRECT; + set_autofs_type_indirect(&sbi->type); sbi->min_proto = 0; sbi->max_proto = 0; mutex_init(&sbi->wq_mutex); @@ -380,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) } root_inode->i_fop = &autofs4_root_operations; - root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ? + root_inode->i_op = autofs_type_trigger(sbi->type) ? &autofs4_direct_root_inode_operations : &autofs4_indirect_root_inode_operations; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index e02cc8ae5eb3..eeb246845909 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, * is very similar for indirect mounts except only dentrys * in the root of the autofs file system may be negative. */ - if (sbi->type & AUTOFS_TYPE_TRIGGER) + if (autofs_type_trigger(sbi->type)) return -ENOENT; else if (!IS_ROOT(dentry->d_parent)) return -ENOENT; @@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, return -ENOMEM; /* If this is a direct mount request create a dummy name */ - if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER) + if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) qstr.len = sprintf(name, "%p", dentry); else { qstr.len = autofs4_getpath(sbi, dentry, &name); @@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, type = autofs_ptype_expire_multi; } else { if (notify == NFY_MOUNT) - type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? + type = autofs_type_trigger(sbi->type) ? autofs_ptype_missing_direct : autofs_ptype_missing_indirect; else - type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? + type = autofs_type_trigger(sbi->type) ? autofs_ptype_expire_direct : autofs_ptype_expire_indirect; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 0ed57b5ee012..cc4062d12ca2 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -213,6 +213,9 @@ static void bfs_put_super(struct super_block *s) { struct bfs_sb_info *info = BFS_SB(s); + if (!info) + return; + brelse(info->si_sbh); mutex_destroy(&info->bfs_lock); kfree(info->si_imap); @@ -327,6 +330,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) unsigned i, imap_len; struct bfs_sb_info *info; long ret = -EINVAL; + unsigned long i_sblock, i_eblock, i_eoff, s_size; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -350,6 +354,12 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; info->si_sbh = bh; + + if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { + printf("Superblock is corrupted\n"); + goto out; + } + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; @@ -380,6 +390,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; + + /* can we read the last block? */ + bh = sb_bread(s, info->si_blocks - 1); + if (!bh) { + printf("Last block not available: %lu\n", info->si_blocks - 1); + iput(inode); + ret = -EIO; + kfree(info->si_imap); + goto out; + } + brelse(bh); + bh = NULL; for (i = BFS_ROOT_INO; i <= info->si_lasti; i++) { struct bfs_inode *di; @@ -397,6 +419,29 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) di = (struct bfs_inode *)bh->b_data + off; + /* test if filesystem is not corrupted */ + + i_eoff = le32_to_cpu(di->i_eoffset); + i_sblock = le32_to_cpu(di->i_sblock); + i_eblock = le32_to_cpu(di->i_eblock); + s_size = le32_to_cpu(bfs_sb->s_end); + + if (i_sblock > info->si_blocks || + i_eblock > info->si_blocks || + i_sblock > i_eblock || + i_eoff > s_size || + i_sblock * BFS_BSIZE > i_eoff) { + + printf("Inode 0x%08x corrupted\n", i); + + brelse(bh); + s->s_root = NULL; + kfree(info->si_imap); + kfree(info); + s->s_fs_info = NULL; + return -EIO; + } + if (!di->i_ino) { info->si_freei++; continue; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e1158cb4fbd6..c4e83537ead7 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -649,7 +649,7 @@ static const struct file_operations bm_register_operations = { static ssize_t bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - char *s = enabled ? "enabled" : "disabled"; + char *s = enabled ? "enabled\n" : "disabled\n"; return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 349a26c10001..b957717e25ab 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name - * @path: special file representing the block device + * @pathname: special file representing the block device * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) diff --git a/fs/buffer.c b/fs/buffer.c index a13f09b696f7..c26da785938a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2022,7 +2022,6 @@ int block_write_begin(struct file *file, struct address_space *mapping, if (pos + len > inode->i_size) vmtruncate(inode, inode->i_size); } - goto out; } out: diff --git a/fs/char_dev.c b/fs/char_dev.c index 700697a72618..38f71222a552 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -120,7 +120,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, cd->major = major; cd->baseminor = baseminor; cd->minorct = minorct; - strncpy(cd->name,name, 64); + strlcpy(cd->name, name, sizeof(cd->name)); i = major_to_index(major); diff --git a/fs/compat.c b/fs/compat.c index d1ece79b6411..30f2faa22f5c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1187,6 +1187,9 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsign ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos); out: + if (ret > 0) + add_rchar(current, ret); + inc_syscr(current); fput(file); return ret; } @@ -1210,6 +1213,9 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsig ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos); out: + if (ret > 0) + add_wchar(current, ret); + inc_syscw(current); fput(file); return ret; } diff --git a/fs/direct-io.c b/fs/direct-io.c index af0558dbe8b7..b6d43908ff7a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1209,6 +1209,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, retval = direct_io_worker(rw, iocb, inode, iov, offset, nr_segs, blkbits, get_block, end_io, dio); + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again for DIO_LOCKING. + * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by + * it's own meaner. + */ + if (unlikely(retval < 0 && (rw & WRITE))) { + loff_t isize = i_size_read(inode); + + if (end > isize && dio_lock_type == DIO_LOCKING) + vmtruncate(inode, isize); + } + if (rw == READ && dio_lock_type == DIO_LOCKING) release_i_mutex = 0; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 6046239465a1..c01e043670e2 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -175,8 +175,8 @@ out: * * Returns zero on success; non-zero on error. */ -static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, - loff_t offset) +int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, + loff_t offset) { int rc = 0; char dst[MD5_DIGEST_SIZE]; @@ -924,6 +924,15 @@ static void ecryptfs_copy_mount_wide_flags_to_inode_flags( crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED; + if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { + crypt_stat->flags |= ECRYPTFS_ENCRYPT_FILENAMES; + if (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK) + crypt_stat->flags |= ECRYPTFS_ENCFN_USE_MOUNT_FNEK; + else if (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCFN_USE_FEK) + crypt_stat->flags |= ECRYPTFS_ENCFN_USE_FEK; + } } static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( @@ -1060,7 +1069,8 @@ struct ecryptfs_flag_map_elem { static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { {0x00000001, ECRYPTFS_ENABLE_HMAC}, {0x00000002, ECRYPTFS_ENCRYPTED}, - {0x00000004, ECRYPTFS_METADATA_IN_XATTR} + {0x00000004, ECRYPTFS_METADATA_IN_XATTR}, + {0x00000008, ECRYPTFS_ENCRYPT_FILENAMES} }; /** @@ -1149,19 +1159,20 @@ ecryptfs_cipher_code_str_map[] = { /** * ecryptfs_code_for_cipher_string - * @crypt_stat: The cryptographic context + * @cipher_name: The string alias for the cipher + * @key_bytes: Length of key in bytes; used for AES code selection * * Returns zero on no match, or the cipher code on match */ -u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) +u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes) { int i; u8 code = 0; struct ecryptfs_cipher_code_str_map_elem *map = ecryptfs_cipher_code_str_map; - if (strcmp(crypt_stat->cipher, "aes") == 0) { - switch (crypt_stat->key_size) { + if (strcmp(cipher_name, "aes") == 0) { + switch (key_bytes) { case 16: code = RFC2440_CIPHER_AES_128; break; @@ -1173,7 +1184,7 @@ u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) } } else { for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) - if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){ + if (strcmp(cipher_name, map[i].cipher_str) == 0) { code = map[i].cipher_code; break; } @@ -1212,6 +1223,8 @@ int ecryptfs_read_and_validate_header_region(char *data, &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); int rc; + if (crypt_stat->extent_size == 0) + crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, ecryptfs_inode); if (rc) { @@ -1221,7 +1234,6 @@ int ecryptfs_read_and_validate_header_region(char *data, } if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { rc = -EINVAL; - ecryptfs_printk(KERN_DEBUG, "Valid marker not found\n"); } out: return rc; @@ -1628,95 +1640,95 @@ out: } /** - * ecryptfs_encode_filename - converts a plaintext file name to cipher text - * @crypt_stat: The crypt_stat struct associated with the file anem to encode - * @name: The plaintext name - * @length: The length of the plaintext - * @encoded_name: The encypted name + * ecryptfs_encrypt_filename - encrypt filename * - * Encrypts and encodes a filename into something that constitutes a - * valid filename for a filesystem, with printable characters. + * CBC-encrypts the filename. We do not want to encrypt the same + * filename with the same key and IV, which may happen with hard + * links, so we prepend random bits to each filename. * - * We assume that we have a properly initialized crypto context, - * pointed to by crypt_stat->tfm. - * - * TODO: Implement filename decoding and decryption here, in place of - * memcpy. We are keeping the framework around for now to (1) - * facilitate testing of the components needed to implement filename - * encryption and (2) to provide a code base from which other - * developers in the community can easily implement this feature. - * - * Returns the length of encoded filename; negative if error + * Returns zero on success; non-zero otherwise */ -int -ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, - const char *name, int length, char **encoded_name) +static int +ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) { - int error = 0; + int rc = 0; - (*encoded_name) = kmalloc(length + 2, GFP_KERNEL); - if (!(*encoded_name)) { - error = -ENOMEM; + filename->encrypted_filename = NULL; + filename->encrypted_filename_size = 0; + if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) + || (mount_crypt_stat && (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) { + size_t packet_size; + size_t remaining_bytes; + + rc = ecryptfs_write_tag_70_packet( + NULL, NULL, + &filename->encrypted_filename_size, + mount_crypt_stat, NULL, + filename->filename_size); + if (rc) { + printk(KERN_ERR "%s: Error attempting to get packet " + "size for tag 72; rc = [%d]\n", __func__, + rc); + filename->encrypted_filename_size = 0; + goto out; + } + filename->encrypted_filename = + kmalloc(filename->encrypted_filename_size, GFP_KERNEL); + if (!filename->encrypted_filename) { + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kmalloc [%zd] bytes\n", __func__, + filename->encrypted_filename_size); + rc = -ENOMEM; + goto out; + } + remaining_bytes = filename->encrypted_filename_size; + rc = ecryptfs_write_tag_70_packet(filename->encrypted_filename, + &remaining_bytes, + &packet_size, + mount_crypt_stat, + filename->filename, + filename->filename_size); + if (rc) { + printk(KERN_ERR "%s: Error attempting to generate " + "tag 70 packet; rc = [%d]\n", __func__, + rc); + kfree(filename->encrypted_filename); + filename->encrypted_filename = NULL; + filename->encrypted_filename_size = 0; + goto out; + } + filename->encrypted_filename_size = packet_size; + } else { + printk(KERN_ERR "%s: No support for requested filename " + "encryption method in this release\n", __func__); + rc = -ENOTSUPP; goto out; } - /* TODO: Filename encryption is a scheduled feature for a - * future version of eCryptfs. This function is here only for - * the purpose of providing a framework for other developers - * to easily implement filename encryption. Hint: Replace this - * memcpy() with a call to encrypt and encode the - * filename, the set the length accordingly. */ - memcpy((void *)(*encoded_name), (void *)name, length); - (*encoded_name)[length] = '\0'; - error = length + 1; out: - return error; + return rc; } -/** - * ecryptfs_decode_filename - converts the cipher text name to plaintext - * @crypt_stat: The crypt_stat struct associated with the file - * @name: The filename in cipher text - * @length: The length of the cipher text name - * @decrypted_name: The plaintext name - * - * Decodes and decrypts the filename. - * - * We assume that we have a properly initialized crypto context, - * pointed to by crypt_stat->tfm. - * - * TODO: Implement filename decoding and decryption here, in place of - * memcpy. We are keeping the framework around for now to (1) - * facilitate testing of the components needed to implement filename - * encryption and (2) to provide a code base from which other - * developers in the community can easily implement this feature. - * - * Returns the length of decoded filename; negative if error - */ -int -ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, - const char *name, int length, char **decrypted_name) +static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size, + const char *name, size_t name_size) { - int error = 0; + int rc = 0; - (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL); - if (!(*decrypted_name)) { - error = -ENOMEM; + (*copied_name) = kmalloc((name_size + 2), GFP_KERNEL); + if (!(*copied_name)) { + rc = -ENOMEM; goto out; } - /* TODO: Filename encryption is a scheduled feature for a - * future version of eCryptfs. This function is here only for - * the purpose of providing a framework for other developers - * to easily implement filename encryption. Hint: Replace this - * memcpy() with a call to decode and decrypt the - * filename, the set the length accordingly. */ - memcpy((void *)(*decrypted_name), (void *)name, length); - (*decrypted_name)[length + 1] = '\0'; /* Only for convenience + memcpy((void *)(*copied_name), (void *)name, name_size); + (*copied_name)[(name_size)] = '\0'; /* Only for convenience * in printing out the * string in debug * messages */ - error = length; + (*copied_name_size) = (name_size + 1); out: - return error; + return rc; } /** @@ -1740,7 +1752,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, *key_tfm = NULL; if (*key_size > ECRYPTFS_MAX_KEY_BYTES) { rc = -EINVAL; - printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum " + printk(KERN_ERR "Requested key size is [%zd] bytes; maximum " "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES); goto out; } @@ -1765,7 +1777,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, get_random_bytes(dummy_key, *key_size); rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size); if (rc) { - printk(KERN_ERR "Error attempting to set key of size [%Zd] for " + printk(KERN_ERR "Error attempting to set key of size [%zd] for " "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc); rc = -EINVAL; goto out; @@ -1910,3 +1922,341 @@ out: mutex_unlock(&key_tfm_list_mutex); return rc; } + +/* 64 characters forming a 6-bit target field */ +static unsigned char *portable_filename_chars = ("-.0123456789ABCD" + "EFGHIJKLMNOPQRST" + "UVWXYZabcdefghij" + "klmnopqrstuvwxyz"); + +/* We could either offset on every reverse map or just pad some 0x00's + * at the front here */ +static const unsigned char filename_rev_map[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 31 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 39 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* 47 */ + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, /* 55 */ + 0x0A, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 63 */ + 0x00, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, /* 71 */ + 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, /* 79 */ + 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, /* 87 */ + 0x23, 0x24, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, /* 95 */ + 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */ + 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */ + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */ + 0x3D, 0x3E, 0x3F +}; + +/** + * ecryptfs_encode_for_filename + * @dst: Destination location for encoded filename + * @dst_size: Size of the encoded filename in bytes + * @src: Source location for the filename to encode + * @src_size: Size of the source in bytes + */ +void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size, + unsigned char *src, size_t src_size) +{ + size_t num_blocks; + size_t block_num = 0; + size_t dst_offset = 0; + unsigned char last_block[3]; + + if (src_size == 0) { + (*dst_size) = 0; + goto out; + } + num_blocks = (src_size / 3); + if ((src_size % 3) == 0) { + memcpy(last_block, (&src[src_size - 3]), 3); + } else { + num_blocks++; + last_block[2] = 0x00; + switch (src_size % 3) { + case 1: + last_block[0] = src[src_size - 1]; + last_block[1] = 0x00; + break; + case 2: + last_block[0] = src[src_size - 2]; + last_block[1] = src[src_size - 1]; + } + } + (*dst_size) = (num_blocks * 4); + if (!dst) + goto out; + while (block_num < num_blocks) { + unsigned char *src_block; + unsigned char dst_block[4]; + + if (block_num == (num_blocks - 1)) + src_block = last_block; + else + src_block = &src[block_num * 3]; + dst_block[0] = ((src_block[0] >> 2) & 0x3F); + dst_block[1] = (((src_block[0] << 4) & 0x30) + | ((src_block[1] >> 4) & 0x0F)); + dst_block[2] = (((src_block[1] << 2) & 0x3C) + | ((src_block[2] >> 6) & 0x03)); + dst_block[3] = (src_block[2] & 0x3F); + dst[dst_offset++] = portable_filename_chars[dst_block[0]]; + dst[dst_offset++] = portable_filename_chars[dst_block[1]]; + dst[dst_offset++] = portable_filename_chars[dst_block[2]]; + dst[dst_offset++] = portable_filename_chars[dst_block[3]]; + block_num++; + } +out: + return; +} + +/** + * ecryptfs_decode_from_filename + * @dst: If NULL, this function only sets @dst_size and returns. If + * non-NULL, this function decodes the encoded octets in @src + * into the memory that @dst points to. + * @dst_size: Set to the size of the decoded string. + * @src: The encoded set of octets to decode. + * @src_size: The size of the encoded set of octets to decode. + */ +static void +ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, + const unsigned char *src, size_t src_size) +{ + u8 current_bit_offset = 0; + size_t src_byte_offset = 0; + size_t dst_byte_offset = 0; + + if (dst == NULL) { + /* Not exact; conservatively long. Every block of 4 + * encoded characters decodes into a block of 3 + * decoded characters. This segment of code provides + * the caller with the maximum amount of allocated + * space that @dst will need to point to in a + * subsequent call. */ + (*dst_size) = (((src_size + 1) * 3) / 4); + goto out; + } + while (src_byte_offset < src_size) { + unsigned char src_byte = + filename_rev_map[(int)src[src_byte_offset]]; + + switch (current_bit_offset) { + case 0: + dst[dst_byte_offset] = (src_byte << 2); + current_bit_offset = 6; + break; + case 6: + dst[dst_byte_offset++] |= (src_byte >> 4); + dst[dst_byte_offset] = ((src_byte & 0xF) + << 4); + current_bit_offset = 4; + break; + case 4: + dst[dst_byte_offset++] |= (src_byte >> 2); + dst[dst_byte_offset] = (src_byte << 6); + current_bit_offset = 2; + break; + case 2: + dst[dst_byte_offset++] |= (src_byte); + dst[dst_byte_offset] = 0; + current_bit_offset = 0; + break; + } + src_byte_offset++; + } + (*dst_size) = dst_byte_offset; +out: + return; +} + +/** + * ecryptfs_encrypt_and_encode_filename - converts a plaintext file name to cipher text + * @crypt_stat: The crypt_stat struct associated with the file anem to encode + * @name: The plaintext name + * @length: The length of the plaintext + * @encoded_name: The encypted name + * + * Encrypts and encodes a filename into something that constitutes a + * valid filename for a filesystem, with printable characters. + * + * We assume that we have a properly initialized crypto context, + * pointed to by crypt_stat->tfm. + * + * Returns zero on success; non-zero on otherwise + */ +int ecryptfs_encrypt_and_encode_filename( + char **encoded_name, + size_t *encoded_name_size, + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + const char *name, size_t name_size) +{ + size_t encoded_name_no_prefix_size; + int rc = 0; + + (*encoded_name) = NULL; + (*encoded_name_size) = 0; + if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES)) + || (mount_crypt_stat && (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) { + struct ecryptfs_filename *filename; + + filename = kzalloc(sizeof(*filename), GFP_KERNEL); + if (!filename) { + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kzalloc [%zd] bytes\n", __func__, + sizeof(*filename)); + rc = -ENOMEM; + goto out; + } + filename->filename = (char *)name; + filename->filename_size = name_size; + rc = ecryptfs_encrypt_filename(filename, crypt_stat, + mount_crypt_stat); + if (rc) { + printk(KERN_ERR "%s: Error attempting to encrypt " + "filename; rc = [%d]\n", __func__, rc); + kfree(filename); + goto out; + } + ecryptfs_encode_for_filename( + NULL, &encoded_name_no_prefix_size, + filename->encrypted_filename, + filename->encrypted_filename_size); + if ((crypt_stat && (crypt_stat->flags + & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) + || (mount_crypt_stat + && (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) + (*encoded_name_size) = + (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE + + encoded_name_no_prefix_size); + else + (*encoded_name_size) = + (ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE + + encoded_name_no_prefix_size); + (*encoded_name) = kmalloc((*encoded_name_size) + 1, GFP_KERNEL); + if (!(*encoded_name)) { + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kzalloc [%zd] bytes\n", __func__, + (*encoded_name_size)); + rc = -ENOMEM; + kfree(filename->encrypted_filename); + kfree(filename); + goto out; + } + if ((crypt_stat && (crypt_stat->flags + & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) + || (mount_crypt_stat + && (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) { + memcpy((*encoded_name), + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE); + ecryptfs_encode_for_filename( + ((*encoded_name) + + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE), + &encoded_name_no_prefix_size, + filename->encrypted_filename, + filename->encrypted_filename_size); + (*encoded_name_size) = + (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE + + encoded_name_no_prefix_size); + (*encoded_name)[(*encoded_name_size)] = '\0'; + (*encoded_name_size)++; + } else { + rc = -ENOTSUPP; + } + if (rc) { + printk(KERN_ERR "%s: Error attempting to encode " + "encrypted filename; rc = [%d]\n", __func__, + rc); + kfree((*encoded_name)); + (*encoded_name) = NULL; + (*encoded_name_size) = 0; + } + kfree(filename->encrypted_filename); + kfree(filename); + } else { + rc = ecryptfs_copy_filename(encoded_name, + encoded_name_size, + name, name_size); + } +out: + return rc; +} + +/** + * ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext + * @plaintext_name: The plaintext name + * @plaintext_name_size: The plaintext name size + * @ecryptfs_dir_dentry: eCryptfs directory dentry + * @name: The filename in cipher text + * @name_size: The cipher text name size + * + * Decrypts and decodes the filename. + * + * Returns zero on error; non-zero otherwise + */ +int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, + size_t *plaintext_name_size, + struct dentry *ecryptfs_dir_dentry, + const char *name, size_t name_size) +{ + char *decoded_name; + size_t decoded_name_size; + size_t packet_size; + int rc = 0; + + if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) + && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, + ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) { + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dir_dentry->d_sb)->mount_crypt_stat; + const char *orig_name = name; + size_t orig_name_size = name_size; + + name += ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; + name_size -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; + ecryptfs_decode_from_filename(NULL, &decoded_name_size, + name, name_size); + decoded_name = kmalloc(decoded_name_size, GFP_KERNEL); + if (!decoded_name) { + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kmalloc [%zd] bytes\n", __func__, + decoded_name_size); + rc = -ENOMEM; + goto out; + } + ecryptfs_decode_from_filename(decoded_name, &decoded_name_size, + name, name_size); + rc = ecryptfs_parse_tag_70_packet(plaintext_name, + plaintext_name_size, + &packet_size, + mount_crypt_stat, + decoded_name, + decoded_name_size); + if (rc) { + printk(KERN_INFO "%s: Could not parse tag 70 packet " + "from filename; copying through filename " + "as-is\n", __func__); + rc = ecryptfs_copy_filename(plaintext_name, + plaintext_name_size, + orig_name, orig_name_size); + goto out_free; + } + } else { + rc = ecryptfs_copy_filename(plaintext_name, + plaintext_name_size, + name, name_size); + goto out; + } +out_free: + kfree(decoded_name); +out: + return rc; +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index a75026d35d16..c11fc95714ab 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -51,12 +51,16 @@ #define ECRYPTFS_VERSIONING_XATTR 0x00000010 #define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 #define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 +#define ECRYPTFS_VERSIONING_HMAC 0x00000080 +#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 +#define ECRYPTFS_VERSIONING_GCM 0x00000200 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ | ECRYPTFS_VERSIONING_PUBKEY \ | ECRYPTFS_VERSIONING_XATTR \ | ECRYPTFS_VERSIONING_MULTKEY \ - | ECRYPTFS_VERSIONING_DEVMISC) + | ECRYPTFS_VERSIONING_DEVMISC \ + | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 @@ -199,6 +203,7 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_DEFAULT_CIPHER "aes" #define ECRYPTFS_DEFAULT_KEY_BYTES 16 #define ECRYPTFS_DEFAULT_HASH "md5" +#define ECRYPTFS_TAG_70_DIGEST ECRYPTFS_DEFAULT_HASH #define ECRYPTFS_TAG_1_PACKET_TYPE 0x01 #define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C #define ECRYPTFS_TAG_11_PACKET_TYPE 0xED @@ -206,30 +211,64 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_TAG_65_PACKET_TYPE 0x41 #define ECRYPTFS_TAG_66_PACKET_TYPE 0x42 #define ECRYPTFS_TAG_67_PACKET_TYPE 0x43 +#define ECRYPTFS_TAG_70_PACKET_TYPE 0x46 /* FNEK-encrypted filename + * as dentry name */ +#define ECRYPTFS_TAG_71_PACKET_TYPE 0x47 /* FNEK-encrypted filename in + * metadata */ +#define ECRYPTFS_TAG_72_PACKET_TYPE 0x48 /* FEK-encrypted filename as + * dentry name */ +#define ECRYPTFS_TAG_73_PACKET_TYPE 0x49 /* FEK-encrypted filename as + * metadata */ +/* Constraint: ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES >= + * ECRYPTFS_MAX_IV_BYTES */ +#define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16 +#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */ #define MD5_DIGEST_SIZE 16 +#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE +#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED." +#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23 +#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED." +#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24 +#define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32) struct ecryptfs_key_sig { struct list_head crypt_stat_list; char keysig[ECRYPTFS_SIG_SIZE_HEX]; }; +struct ecryptfs_filename { + struct list_head crypt_stat_list; +#define ECRYPTFS_FILENAME_CONTAINS_DECRYPTED 0x00000001 + u32 flags; + u32 seq_no; + char *filename; + char *encrypted_filename; + size_t filename_size; + size_t encrypted_filename_size; + char fnek_sig[ECRYPTFS_SIG_SIZE_HEX]; + char dentry_name[ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN + 1]; +}; + /** * This is the primary struct associated with each encrypted file. * * TODO: cache align/pack? */ struct ecryptfs_crypt_stat { -#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 -#define ECRYPTFS_POLICY_APPLIED 0x00000002 -#define ECRYPTFS_NEW_FILE 0x00000004 -#define ECRYPTFS_ENCRYPTED 0x00000008 -#define ECRYPTFS_SECURITY_WARNING 0x00000010 -#define ECRYPTFS_ENABLE_HMAC 0x00000020 -#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 -#define ECRYPTFS_KEY_VALID 0x00000080 -#define ECRYPTFS_METADATA_IN_XATTR 0x00000100 -#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 -#define ECRYPTFS_KEY_SET 0x00000400 +#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 +#define ECRYPTFS_POLICY_APPLIED 0x00000002 +#define ECRYPTFS_NEW_FILE 0x00000004 +#define ECRYPTFS_ENCRYPTED 0x00000008 +#define ECRYPTFS_SECURITY_WARNING 0x00000010 +#define ECRYPTFS_ENABLE_HMAC 0x00000020 +#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 +#define ECRYPTFS_KEY_VALID 0x00000080 +#define ECRYPTFS_METADATA_IN_XATTR 0x00000100 +#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 +#define ECRYPTFS_KEY_SET 0x00000400 +#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 +#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 +#define ECRYPTFS_ENCFN_USE_FEK 0x00002000 u32 flags; unsigned int file_version; size_t iv_bytes; @@ -332,13 +371,20 @@ struct ecryptfs_mount_crypt_stat { #define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002 #define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004 #define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED 0x00000008 +#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010 +#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020 +#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040 u32 flags; struct list_head global_auth_tok_list; struct mutex global_auth_tok_list_mutex; size_t num_global_auth_toks; size_t global_default_cipher_key_size; + size_t global_default_fn_cipher_key_bytes; unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; + unsigned char global_default_fn_cipher_name[ + ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; + char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; }; /* superblock private data. */ @@ -571,13 +617,22 @@ struct ecryptfs_open_req { int ecryptfs_interpose(struct dentry *hidden_dentry, struct dentry *this_dentry, struct super_block *sb, u32 flags); +int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, + struct dentry *lower_dentry, + struct ecryptfs_crypt_stat *crypt_stat, + struct inode *ecryptfs_dir_inode, + struct nameidata *ecryptfs_nd); +int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, + size_t *decrypted_name_size, + struct dentry *ecryptfs_dentry, + const char *name, size_t name_size); int ecryptfs_fill_zeros(struct file *file, loff_t new_length); -int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, - const char *name, int length, - char **decrypted_name); -int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, - const char *name, int length, - char **encoded_name); +int ecryptfs_encrypt_and_encode_filename( + char **encoded_name, + size_t *encoded_name_size, + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + const char *name, size_t name_size); struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); void ecryptfs_dump_hex(char *data, int bytes); int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, @@ -599,7 +654,7 @@ int ecryptfs_read_and_validate_header_region(char *data, struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry); -u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); +u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes); int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); int ecryptfs_generate_key_packet_set(char *dest_base, @@ -694,5 +749,17 @@ int ecryptfs_privileged_open(struct file **lower_file, struct vfsmount *lower_mnt, const struct cred *cred); int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); +int +ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, + size_t *packet_size, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *filename, size_t filename_size); +int +ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, + size_t *packet_size, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *data, size_t max_packet_size); +int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, + loff_t offset); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 713834371229..9e944057001b 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -77,27 +77,27 @@ struct ecryptfs_getdents_callback { /* Inspired by generic filldir in fs/readdir.c */ static int -ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, - u64 ino, unsigned int d_type) +ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, + loff_t offset, u64 ino, unsigned int d_type) { - struct ecryptfs_crypt_stat *crypt_stat; struct ecryptfs_getdents_callback *buf = (struct ecryptfs_getdents_callback *)dirent; + size_t name_size; + char *name; int rc; - int decoded_length; - char *decoded_name; - crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat; buf->filldir_called++; - decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen, - &decoded_name); - if (decoded_length < 0) { - rc = decoded_length; + rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size, + buf->dentry, lower_name, + lower_namelen); + if (rc) { + printk(KERN_ERR "%s: Error attempting to decode and decrypt " + "filename [%s]; rc = [%d]\n", __func__, lower_name, + rc); goto out; } - rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset, - ino, d_type); - kfree(decoded_name); + rc = buf->filldir(buf->dirent, name, name_size, offset, ino, d_type); + kfree(name); if (rc >= 0) buf->entries_written++; out: @@ -106,8 +106,8 @@ out: /** * ecryptfs_readdir - * @file: The ecryptfs file struct - * @dirent: Directory entry + * @file: The eCryptfs directory file + * @dirent: Directory entry handle * @filldir: The filldir callback function */ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 0111906a8877..5697899a168d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -52,8 +52,7 @@ static void unlock_dir(struct dentry *dir) /** * ecryptfs_create_underlying_file * @lower_dir_inode: inode of the parent in the lower fs of the new file - * @lower_dentry: New file's dentry in the lower fs - * @ecryptfs_dentry: New file's dentry in ecryptfs + * @dentry: New file's dentry * @mode: The mode of the new file * @nd: nameidata of ecryptfs' parent's dentry & vfsmount * @@ -228,8 +227,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, { int rc; - /* ecryptfs_do_create() calls ecryptfs_interpose(), which opens - * the crypt_stat->lower_file (persistent file) */ + /* ecryptfs_do_create() calls ecryptfs_interpose() */ rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); if (unlikely(rc)) { ecryptfs_printk(KERN_WARNING, "Failed to create file in" @@ -244,141 +242,91 @@ out: } /** - * ecryptfs_lookup - * @dir: inode - * @dentry: The dentry - * @nd: nameidata, may be NULL - * - * Find a file on disk. If the file does not exist, then we'll add it to the - * dentry cache and continue on to read it from the disk. + * ecryptfs_lookup_and_interpose_lower - Perform a lookup */ -static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) +int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, + struct dentry *lower_dentry, + struct ecryptfs_crypt_stat *crypt_stat, + struct inode *ecryptfs_dir_inode, + struct nameidata *ecryptfs_nd) { - int rc = 0; struct dentry *lower_dir_dentry; - struct dentry *lower_dentry; struct vfsmount *lower_mnt; - char *encoded_name; - int encoded_namelen; - struct ecryptfs_crypt_stat *crypt_stat = NULL; + struct inode *lower_inode; struct ecryptfs_mount_crypt_stat *mount_crypt_stat; char *page_virt = NULL; - struct inode *lower_inode; u64 file_size; + int rc = 0; - lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); - dentry->d_op = &ecryptfs_dops; - if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, ".")) - || (dentry->d_name.len == 2 - && !strcmp(dentry->d_name.name, ".."))) { - d_drop(dentry); - goto out; - } - encoded_namelen = ecryptfs_encode_filename(crypt_stat, - dentry->d_name.name, - dentry->d_name.len, - &encoded_name); - if (encoded_namelen < 0) { - rc = encoded_namelen; - d_drop(dentry); - goto out; - } - ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " - "= [%d]\n", encoded_name, encoded_namelen); - lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, - encoded_namelen - 1); - kfree(encoded_name); - if (IS_ERR(lower_dentry)) { - ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); - rc = PTR_ERR(lower_dentry); - d_drop(dentry); - goto out; - } - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); - ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->" - "d_name.name = [%s]\n", lower_dentry, - lower_dentry->d_name.name); + lower_dir_dentry = lower_dentry->d_parent; + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( + ecryptfs_dentry->d_parent)); lower_inode = lower_dentry->d_inode; - fsstack_copy_attr_atime(dir, lower_dir_dentry->d_inode); + fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); BUG_ON(!atomic_read(&lower_dentry->d_count)); - ecryptfs_set_dentry_private(dentry, + ecryptfs_set_dentry_private(ecryptfs_dentry, kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL)); - if (!ecryptfs_dentry_to_private(dentry)) { + if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " - "to allocate ecryptfs_dentry_info struct\n"); + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to allocate ecryptfs_dentry_info struct\n", + __func__); goto out_dput; } - ecryptfs_set_dentry_lower(dentry, lower_dentry); - ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); + ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); if (!lower_dentry->d_inode) { /* We want to add because we couldn't find in lower */ - d_add(dentry, NULL); + d_add(ecryptfs_dentry, NULL); goto out; } - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, - ECRYPTFS_INTERPOSE_FLAG_D_ADD); + rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, + ecryptfs_dir_inode->i_sb, 1); if (rc) { - ecryptfs_printk(KERN_ERR, "Error interposing\n"); + printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", + __func__, rc); goto out; } - if (S_ISDIR(lower_inode->i_mode)) { - ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); + if (S_ISDIR(lower_inode->i_mode)) goto out; - } - if (S_ISLNK(lower_inode->i_mode)) { - ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n"); + if (S_ISLNK(lower_inode->i_mode)) goto out; - } - if (special_file(lower_inode->i_mode)) { - ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n"); + if (special_file(lower_inode->i_mode)) goto out; - } - if (!nd) { - ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" - "as we *think* we are about to unlink\n"); + if (!ecryptfs_nd) goto out; - } /* Released in this function */ - page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, - GFP_USER); + page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); if (!page_virt) { + printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n", + __func__); rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, - "Cannot ecryptfs_kmalloc a page\n"); goto out; } - crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; - if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) - ecryptfs_set_default_sizes(crypt_stat); - if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) { - rc = ecryptfs_init_persistent_file(dentry); + if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the persistent file for the dentry with name " "[%s]; rc = [%d]\n", __func__, - dentry->d_name.name, rc); - goto out; + ecryptfs_dentry->d_name.name, rc); + goto out_free_kmem; } } rc = ecryptfs_read_and_validate_header_region(page_virt, - dentry->d_inode); + ecryptfs_dentry->d_inode); if (rc) { - rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry); + rc = ecryptfs_read_and_validate_xattr_region(page_virt, + ecryptfs_dentry); if (rc) { - printk(KERN_DEBUG "Valid metadata not found in header " - "region or xattr region; treating file as " - "unencrypted\n"); rc = 0; - kmem_cache_free(ecryptfs_header_cache_2, page_virt); - goto out; + goto out_free_kmem; } crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; } mount_crypt_stat = &ecryptfs_superblock_to_private( - dentry->d_sb)->mount_crypt_stat; + ecryptfs_dentry->d_sb)->mount_crypt_stat; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) file_size = (crypt_stat->num_header_bytes_at_front @@ -388,14 +336,103 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, } else { file_size = get_unaligned_be64(page_virt); } - i_size_write(dentry->d_inode, (loff_t)file_size); + i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size); +out_free_kmem: kmem_cache_free(ecryptfs_header_cache_2, page_virt); goto out; - out_dput: dput(lower_dentry); - d_drop(dentry); + d_drop(ecryptfs_dentry); +out: + return rc; +} + +/** + * ecryptfs_lookup + * @ecryptfs_dir_inode: The eCryptfs directory inode + * @ecryptfs_dentry: The eCryptfs dentry that we are looking up + * @ecryptfs_nd: nameidata; may be NULL + * + * Find a file on disk. If the file does not exist, then we'll add it to the + * dentry cache and continue on to read it from the disk. + */ +static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, + struct dentry *ecryptfs_dentry, + struct nameidata *ecryptfs_nd) +{ + char *encrypted_and_encoded_name = NULL; + size_t encrypted_and_encoded_name_size; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; + struct ecryptfs_inode_info *inode_info; + struct dentry *lower_dir_dentry, *lower_dentry; + int rc = 0; + + ecryptfs_dentry->d_op = &ecryptfs_dops; + if ((ecryptfs_dentry->d_name.len == 1 + && !strcmp(ecryptfs_dentry->d_name.name, ".")) + || (ecryptfs_dentry->d_name.len == 2 + && !strcmp(ecryptfs_dentry->d_name.name, ".."))) { + goto out_d_drop; + } + lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); + lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, + lower_dir_dentry, + ecryptfs_dentry->d_name.len); + if (IS_ERR(lower_dentry)) { + rc = PTR_ERR(lower_dentry); + printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " + "lower_dentry = [%s]\n", __func__, rc, + ecryptfs_dentry->d_name.name); + goto out_d_drop; + } + if (lower_dentry->d_inode) + goto lookup_and_interpose; + inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); + if (inode_info) { + crypt_stat = &inode_info->crypt_stat; + /* TODO: lock for crypt_stat comparison */ + if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) + ecryptfs_set_default_sizes(crypt_stat); + } + if (crypt_stat) + mount_crypt_stat = crypt_stat->mount_crypt_stat; + else + mount_crypt_stat = &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES)) + && !(mount_crypt_stat && (mount_crypt_stat->flags + & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) + goto lookup_and_interpose; + dput(lower_dentry); + rc = ecryptfs_encrypt_and_encode_filename( + &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, + crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name, + ecryptfs_dentry->d_name.len); + if (rc) { + printk(KERN_ERR "%s: Error attempting to encrypt and encode " + "filename; rc = [%d]\n", __func__, rc); + goto out_d_drop; + } + lower_dentry = lookup_one_len(encrypted_and_encoded_name, + lower_dir_dentry, + encrypted_and_encoded_name_size - 1); + if (IS_ERR(lower_dentry)) { + rc = PTR_ERR(lower_dentry); + printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " + "lower_dentry = [%s]\n", __func__, rc, + encrypted_and_encoded_name); + goto out_d_drop; + } +lookup_and_interpose: + rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, + crypt_stat, ecryptfs_dir_inode, + ecryptfs_nd); + goto out; +out_d_drop: + d_drop(ecryptfs_dentry); out: + kfree(encrypted_and_encoded_name); return ERR_PTR(rc); } @@ -466,19 +503,21 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, struct dentry *lower_dentry; struct dentry *lower_dir_dentry; char *encoded_symname; - int encoded_symlen; - struct ecryptfs_crypt_stat *crypt_stat = NULL; + size_t encoded_symlen; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, - strlen(symname), - &encoded_symname); - if (encoded_symlen < 0) { - rc = encoded_symlen; + mount_crypt_stat = &ecryptfs_superblock_to_private( + dir->i_sb)->mount_crypt_stat; + rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname, + &encoded_symlen, + NULL, + mount_crypt_stat, symname, + strlen(symname)); + if (rc) goto out_lock; - } rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, encoded_symname); kfree(encoded_symname); @@ -602,52 +641,54 @@ out_lock: } static int -ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) +ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) { - int rc; - struct dentry *lower_dentry; - char *decoded_name; char *lower_buf; - mm_segment_t old_fs; + struct dentry *lower_dentry; struct ecryptfs_crypt_stat *crypt_stat; + char *plaintext_name; + size_t plaintext_name_size; + mm_segment_t old_fs; + int rc; lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->readlink) { rc = -EINVAL; goto out; } + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; /* Released in this function */ lower_buf = kmalloc(bufsiz, GFP_KERNEL); if (lower_buf == NULL) { - ecryptfs_printk(KERN_ERR, "Out of memory\n"); + printk(KERN_ERR "%s: Out of memory whilst attempting to " + "kmalloc [%d] bytes\n", __func__, bufsiz); rc = -ENOMEM; goto out; } old_fs = get_fs(); set_fs(get_ds()); - ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " - "lower_dentry->d_name.name = [%s]\n", - lower_dentry->d_name.name); rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, (char __user *)lower_buf, bufsiz); set_fs(old_fs); if (rc >= 0) { - crypt_stat = NULL; - rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc, - &decoded_name); - if (rc == -ENOMEM) + rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, + &plaintext_name_size, + dentry, lower_buf, + rc); + if (rc) { + printk(KERN_ERR "%s: Error attempting to decode and " + "decrypt filename; rc = [%d]\n", __func__, + rc); goto out_free_lower_buf; - if (rc > 0) { - ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes " - "to userspace: [%*s]\n", rc, - decoded_name); - if (copy_to_user(buf, decoded_name, rc)) - rc = -EFAULT; } - kfree(decoded_name); - fsstack_copy_attr_atime(dentry->d_inode, - lower_dentry->d_inode); + rc = copy_to_user(buf, plaintext_name, plaintext_name_size); + if (rc) + rc = -EFAULT; + else + rc = plaintext_name_size; + kfree(plaintext_name); + fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); } out_free_lower_buf: kfree(lower_buf); @@ -669,8 +710,6 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) } old_fs = get_fs(); set_fs(get_ds()); - ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " - "dentry->d_name.name = [%s]\n", dentry->d_name.name); rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); set_fs(old_fs); if (rc < 0) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 0d713b691941..ff539420cc6f 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -358,7 +358,7 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, /* verify that everything through the encrypted FEK size is present */ if (message_len < 4) { rc = -EIO; - printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable " + printk(KERN_ERR "%s: message_len is [%zd]; minimum acceptable " "message length is [%d]\n", __func__, message_len, 4); goto out; } @@ -385,13 +385,13 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, i += data_len; if (message_len < (i + key_rec->enc_key_size)) { rc = -EIO; - printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n", + printk(KERN_ERR "%s: message_len [%zd]; max len is [%zd]\n", __func__, message_len, (i + key_rec->enc_key_size)); goto out; } if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { rc = -EIO; - printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than " + printk(KERN_ERR "%s: Encrypted key_size [%zd] larger than " "the maximum key size [%d]\n", __func__, key_rec->enc_key_size, ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); @@ -403,6 +403,580 @@ out: } static int +ecryptfs_find_global_auth_tok_for_sig( + struct ecryptfs_global_auth_tok **global_auth_tok, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig) +{ + struct ecryptfs_global_auth_tok *walker; + int rc = 0; + + (*global_auth_tok) = NULL; + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(walker, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { + (*global_auth_tok) = walker; + goto out; + } + } + rc = -EINVAL; +out: + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + return rc; +} + +/** + * ecryptfs_find_auth_tok_for_sig + * @auth_tok: Set to the matching auth_tok; NULL if not found + * @crypt_stat: inode crypt_stat crypto context + * @sig: Sig of auth_tok to find + * + * For now, this function simply looks at the registered auth_tok's + * linked off the mount_crypt_stat, so all the auth_toks that can be + * used must be registered at mount time. This function could + * potentially try a lot harder to find auth_tok's (e.g., by calling + * out to ecryptfsd to dynamically retrieve an auth_tok object) so + * that static registration of auth_tok's will no longer be necessary. + * + * Returns zero on no error; non-zero on error + */ +static int +ecryptfs_find_auth_tok_for_sig( + struct ecryptfs_auth_tok **auth_tok, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *sig) +{ + struct ecryptfs_global_auth_tok *global_auth_tok; + int rc = 0; + + (*auth_tok) = NULL; + if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, + mount_crypt_stat, sig)) { + struct key *auth_tok_key; + + rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok, + sig); + } else + (*auth_tok) = global_auth_tok->global_auth_tok; + return rc; +} + +/** + * write_tag_70_packet can gobble a lot of stack space. We stuff most + * of the function's parameters in a kmalloc'd struct to help reduce + * eCryptfs' overall stack usage. + */ +struct ecryptfs_write_tag_70_packet_silly_stack { + u8 cipher_code; + size_t max_packet_size; + size_t packet_size_len; + size_t block_aligned_filename_size; + size_t block_size; + size_t i; + size_t j; + size_t num_rand_bytes; + struct mutex *tfm_mutex; + char *block_aligned_filename; + struct ecryptfs_auth_tok *auth_tok; + struct scatterlist src_sg; + struct scatterlist dst_sg; + struct blkcipher_desc desc; + char iv[ECRYPTFS_MAX_IV_BYTES]; + char hash[ECRYPTFS_TAG_70_DIGEST_SIZE]; + char tmp_hash[ECRYPTFS_TAG_70_DIGEST_SIZE]; + struct hash_desc hash_desc; + struct scatterlist hash_sg; +}; + +/** + * write_tag_70_packet - Write encrypted filename (EFN) packet against FNEK + * @filename: NULL-terminated filename string + * + * This is the simplest mechanism for achieving filename encryption in + * eCryptfs. It encrypts the given filename with the mount-wide + * filename encryption key (FNEK) and stores it in a packet to @dest, + * which the callee will encode and write directly into the dentry + * name. + */ +int +ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, + size_t *packet_size, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *filename, size_t filename_size) +{ + struct ecryptfs_write_tag_70_packet_silly_stack *s; + int rc = 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) { + printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " + "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); + goto out; + } + s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + (*packet_size) = 0; + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name( + &s->desc.tfm, + &s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name); + if (unlikely(rc)) { + printk(KERN_ERR "Internal error whilst attempting to get " + "tfm and mutex for cipher name [%s]; rc = [%d]\n", + mount_crypt_stat->global_default_fn_cipher_name, rc); + goto out; + } + mutex_lock(s->tfm_mutex); + s->block_size = crypto_blkcipher_blocksize(s->desc.tfm); + /* Plus one for the \0 separator between the random prefix + * and the plaintext filename */ + s->num_rand_bytes = (ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + 1); + s->block_aligned_filename_size = (s->num_rand_bytes + filename_size); + if ((s->block_aligned_filename_size % s->block_size) != 0) { + s->num_rand_bytes += (s->block_size + - (s->block_aligned_filename_size + % s->block_size)); + s->block_aligned_filename_size = (s->num_rand_bytes + + filename_size); + } + /* Octet 0: Tag 70 identifier + * Octets 1-N1: Tag 70 packet size (includes cipher identifier + * and block-aligned encrypted filename size) + * Octets N1-N2: FNEK sig (ECRYPTFS_SIG_SIZE) + * Octet N2-N3: Cipher identifier (1 octet) + * Octets N3-N4: Block-aligned encrypted filename + * - Consists of a minimum number of random characters, a \0 + * separator, and then the filename */ + s->max_packet_size = (1 /* Tag 70 identifier */ + + 3 /* Max Tag 70 packet size */ + + ECRYPTFS_SIG_SIZE /* FNEK sig */ + + 1 /* Cipher identifier */ + + s->block_aligned_filename_size); + if (dest == NULL) { + (*packet_size) = s->max_packet_size; + goto out_unlock; + } + if (s->max_packet_size > (*remaining_bytes)) { + printk(KERN_WARNING "%s: Require [%zd] bytes to write; only " + "[%zd] available\n", __func__, s->max_packet_size, + (*remaining_bytes)); + rc = -EINVAL; + goto out_unlock; + } + s->block_aligned_filename = kzalloc(s->block_aligned_filename_size, + GFP_KERNEL); + if (!s->block_aligned_filename) { + printk(KERN_ERR "%s: Out of kernel memory whilst attempting to " + "kzalloc [%zd] bytes\n", __func__, + s->block_aligned_filename_size); + rc = -ENOMEM; + goto out_unlock; + } + s->i = 0; + dest[s->i++] = ECRYPTFS_TAG_70_PACKET_TYPE; + rc = ecryptfs_write_packet_length(&dest[s->i], + (ECRYPTFS_SIG_SIZE + + 1 /* Cipher code */ + + s->block_aligned_filename_size), + &s->packet_size_len); + if (rc) { + printk(KERN_ERR "%s: Error generating tag 70 packet " + "header; cannot generate packet length; rc = [%d]\n", + __func__, rc); + goto out_free_unlock; + } + s->i += s->packet_size_len; + ecryptfs_from_hex(&dest[s->i], + mount_crypt_stat->global_default_fnek_sig, + ECRYPTFS_SIG_SIZE); + s->i += ECRYPTFS_SIG_SIZE; + s->cipher_code = ecryptfs_code_for_cipher_string( + mount_crypt_stat->global_default_fn_cipher_name, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + if (s->cipher_code == 0) { + printk(KERN_WARNING "%s: Unable to generate code for " + "cipher [%s] with key bytes [%zd]\n", __func__, + mount_crypt_stat->global_default_fn_cipher_name, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + rc = -EINVAL; + goto out_free_unlock; + } + dest[s->i++] = s->cipher_code; + rc = ecryptfs_find_auth_tok_for_sig( + &s->auth_tok, mount_crypt_stat, + mount_crypt_stat->global_default_fnek_sig); + if (rc) { + printk(KERN_ERR "%s: Error attempting to find auth tok for " + "fnek sig [%s]; rc = [%d]\n", __func__, + mount_crypt_stat->global_default_fnek_sig, rc); + goto out_free_unlock; + } + /* TODO: Support other key modules than passphrase for + * filename encryption */ + BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); + sg_init_one( + &s->hash_sg, + (u8 *)s->auth_tok->token.password.session_key_encryption_key, + s->auth_tok->token.password.session_key_encryption_key_bytes); + s->hash_desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + s->hash_desc.tfm = crypto_alloc_hash(ECRYPTFS_TAG_70_DIGEST, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(s->hash_desc.tfm)) { + rc = PTR_ERR(s->hash_desc.tfm); + printk(KERN_ERR "%s: Error attempting to " + "allocate hash crypto context; rc = [%d]\n", + __func__, rc); + goto out_free_unlock; + } + rc = crypto_hash_init(&s->hash_desc); + if (rc) { + printk(KERN_ERR + "%s: Error initializing crypto hash; rc = [%d]\n", + __func__, rc); + goto out_release_free_unlock; + } + rc = crypto_hash_update( + &s->hash_desc, &s->hash_sg, + s->auth_tok->token.password.session_key_encryption_key_bytes); + if (rc) { + printk(KERN_ERR + "%s: Error updating crypto hash; rc = [%d]\n", + __func__, rc); + goto out_release_free_unlock; + } + rc = crypto_hash_final(&s->hash_desc, s->hash); + if (rc) { + printk(KERN_ERR + "%s: Error finalizing crypto hash; rc = [%d]\n", + __func__, rc); + goto out_release_free_unlock; + } + for (s->j = 0; s->j < (s->num_rand_bytes - 1); s->j++) { + s->block_aligned_filename[s->j] = + s->hash[(s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)]; + if ((s->j % ECRYPTFS_TAG_70_DIGEST_SIZE) + == (ECRYPTFS_TAG_70_DIGEST_SIZE - 1)) { + sg_init_one(&s->hash_sg, (u8 *)s->hash, + ECRYPTFS_TAG_70_DIGEST_SIZE); + rc = crypto_hash_init(&s->hash_desc); + if (rc) { + printk(KERN_ERR + "%s: Error initializing crypto hash; " + "rc = [%d]\n", __func__, rc); + goto out_release_free_unlock; + } + rc = crypto_hash_update(&s->hash_desc, &s->hash_sg, + ECRYPTFS_TAG_70_DIGEST_SIZE); + if (rc) { + printk(KERN_ERR + "%s: Error updating crypto hash; " + "rc = [%d]\n", __func__, rc); + goto out_release_free_unlock; + } + rc = crypto_hash_final(&s->hash_desc, s->tmp_hash); + if (rc) { + printk(KERN_ERR + "%s: Error finalizing crypto hash; " + "rc = [%d]\n", __func__, rc); + goto out_release_free_unlock; + } + memcpy(s->hash, s->tmp_hash, + ECRYPTFS_TAG_70_DIGEST_SIZE); + } + if (s->block_aligned_filename[s->j] == '\0') + s->block_aligned_filename[s->j] = ECRYPTFS_NON_NULL; + } + memcpy(&s->block_aligned_filename[s->num_rand_bytes], filename, + filename_size); + rc = virt_to_scatterlist(s->block_aligned_filename, + s->block_aligned_filename_size, &s->src_sg, 1); + if (rc != 1) { + printk(KERN_ERR "%s: Internal error whilst attempting to " + "convert filename memory to scatterlist; " + "expected rc = 1; got rc = [%d]. " + "block_aligned_filename_size = [%zd]\n", __func__, rc, + s->block_aligned_filename_size); + goto out_release_free_unlock; + } + rc = virt_to_scatterlist(&dest[s->i], s->block_aligned_filename_size, + &s->dst_sg, 1); + if (rc != 1) { + printk(KERN_ERR "%s: Internal error whilst attempting to " + "convert encrypted filename memory to scatterlist; " + "expected rc = 1; got rc = [%d]. " + "block_aligned_filename_size = [%zd]\n", __func__, rc, + s->block_aligned_filename_size); + goto out_release_free_unlock; + } + /* The characters in the first block effectively do the job + * of the IV here, so we just use 0's for the IV. Note the + * constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + * >= ECRYPTFS_MAX_IV_BYTES. */ + memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); + s->desc.info = s->iv; + rc = crypto_blkcipher_setkey( + s->desc.tfm, + s->auth_tok->token.password.session_key_encryption_key, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + if (rc < 0) { + printk(KERN_ERR "%s: Error setting key for crypto context; " + "rc = [%d]. s->auth_tok->token.password.session_key_" + "encryption_key = [0x%p]; mount_crypt_stat->" + "global_default_fn_cipher_key_bytes = [%zd]\n", __func__, + rc, + s->auth_tok->token.password.session_key_encryption_key, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + goto out_release_free_unlock; + } + rc = crypto_blkcipher_encrypt_iv(&s->desc, &s->dst_sg, &s->src_sg, + s->block_aligned_filename_size); + if (rc) { + printk(KERN_ERR "%s: Error attempting to encrypt filename; " + "rc = [%d]\n", __func__, rc); + goto out_release_free_unlock; + } + s->i += s->block_aligned_filename_size; + (*packet_size) = s->i; + (*remaining_bytes) -= (*packet_size); +out_release_free_unlock: + crypto_free_hash(s->hash_desc.tfm); +out_free_unlock: + memset(s->block_aligned_filename, 0, s->block_aligned_filename_size); + kfree(s->block_aligned_filename); +out_unlock: + mutex_unlock(s->tfm_mutex); +out: + kfree(s); + return rc; +} + +struct ecryptfs_parse_tag_70_packet_silly_stack { + u8 cipher_code; + size_t max_packet_size; + size_t packet_size_len; + size_t parsed_tag_70_packet_size; + size_t block_aligned_filename_size; + size_t block_size; + size_t i; + struct mutex *tfm_mutex; + char *decrypted_filename; + struct ecryptfs_auth_tok *auth_tok; + struct scatterlist src_sg; + struct scatterlist dst_sg; + struct blkcipher_desc desc; + char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; + char iv[ECRYPTFS_MAX_IV_BYTES]; + char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; +}; + +/** + * parse_tag_70_packet - Parse and process FNEK-encrypted passphrase packet + * @filename: This function kmalloc's the memory for the filename + * @filename_size: This function sets this to the amount of memory + * kmalloc'd for the filename + * @packet_size: This function sets this to the the number of octets + * in the packet parsed + * @mount_crypt_stat: The mount-wide cryptographic context + * @data: The memory location containing the start of the tag 70 + * packet + * @max_packet_size: The maximum legal size of the packet to be parsed + * from @data + * + * Returns zero on success; non-zero otherwise + */ +int +ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, + size_t *packet_size, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *data, size_t max_packet_size) +{ + struct ecryptfs_parse_tag_70_packet_silly_stack *s; + int rc = 0; + + (*packet_size) = 0; + (*filename_size) = 0; + (*filename) = NULL; + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) { + printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " + "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); + goto out; + } + s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) { + printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be " + "at least [%d]\n", __func__, max_packet_size, + (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)); + rc = -EINVAL; + goto out; + } + /* Octet 0: Tag 70 identifier + * Octets 1-N1: Tag 70 packet size (includes cipher identifier + * and block-aligned encrypted filename size) + * Octets N1-N2: FNEK sig (ECRYPTFS_SIG_SIZE) + * Octet N2-N3: Cipher identifier (1 octet) + * Octets N3-N4: Block-aligned encrypted filename + * - Consists of a minimum number of random numbers, a \0 + * separator, and then the filename */ + if (data[(*packet_size)++] != ECRYPTFS_TAG_70_PACKET_TYPE) { + printk(KERN_WARNING "%s: Invalid packet tag [0x%.2x]; must be " + "tag [0x%.2x]\n", __func__, + data[((*packet_size) - 1)], ECRYPTFS_TAG_70_PACKET_TYPE); + rc = -EINVAL; + goto out; + } + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], + &s->parsed_tag_70_packet_size, + &s->packet_size_len); + if (rc) { + printk(KERN_WARNING "%s: Error parsing packet length; " + "rc = [%d]\n", __func__, rc); + goto out; + } + s->block_aligned_filename_size = (s->parsed_tag_70_packet_size + - ECRYPTFS_SIG_SIZE - 1); + if ((1 + s->packet_size_len + s->parsed_tag_70_packet_size) + > max_packet_size) { + printk(KERN_WARNING "%s: max_packet_size is [%zd]; real packet " + "size is [%zd]\n", __func__, max_packet_size, + (1 + s->packet_size_len + 1 + + s->block_aligned_filename_size)); + rc = -EINVAL; + goto out; + } + (*packet_size) += s->packet_size_len; + ecryptfs_to_hex(s->fnek_sig_hex, &data[(*packet_size)], + ECRYPTFS_SIG_SIZE); + s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0'; + (*packet_size) += ECRYPTFS_SIG_SIZE; + s->cipher_code = data[(*packet_size)++]; + rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code); + if (rc) { + printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n", + __func__, s->cipher_code); + goto out; + } + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm, + &s->tfm_mutex, + s->cipher_string); + if (unlikely(rc)) { + printk(KERN_ERR "Internal error whilst attempting to get " + "tfm and mutex for cipher name [%s]; rc = [%d]\n", + s->cipher_string, rc); + goto out; + } + mutex_lock(s->tfm_mutex); + rc = virt_to_scatterlist(&data[(*packet_size)], + s->block_aligned_filename_size, &s->src_sg, 1); + if (rc != 1) { + printk(KERN_ERR "%s: Internal error whilst attempting to " + "convert encrypted filename memory to scatterlist; " + "expected rc = 1; got rc = [%d]. " + "block_aligned_filename_size = [%zd]\n", __func__, rc, + s->block_aligned_filename_size); + goto out_unlock; + } + (*packet_size) += s->block_aligned_filename_size; + s->decrypted_filename = kmalloc(s->block_aligned_filename_size, + GFP_KERNEL); + if (!s->decrypted_filename) { + printk(KERN_ERR "%s: Out of memory whilst attempting to " + "kmalloc [%zd] bytes\n", __func__, + s->block_aligned_filename_size); + rc = -ENOMEM; + goto out_unlock; + } + rc = virt_to_scatterlist(s->decrypted_filename, + s->block_aligned_filename_size, &s->dst_sg, 1); + if (rc != 1) { + printk(KERN_ERR "%s: Internal error whilst attempting to " + "convert decrypted filename memory to scatterlist; " + "expected rc = 1; got rc = [%d]. " + "block_aligned_filename_size = [%zd]\n", __func__, rc, + s->block_aligned_filename_size); + goto out_free_unlock; + } + /* The characters in the first block effectively do the job of + * the IV here, so we just use 0's for the IV. Note the + * constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + * >= ECRYPTFS_MAX_IV_BYTES. */ + memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); + s->desc.info = s->iv; + rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat, + s->fnek_sig_hex); + if (rc) { + printk(KERN_ERR "%s: Error attempting to find auth tok for " + "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex, + rc); + goto out_free_unlock; + } + /* TODO: Support other key modules than passphrase for + * filename encryption */ + BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD); + rc = crypto_blkcipher_setkey( + s->desc.tfm, + s->auth_tok->token.password.session_key_encryption_key, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + if (rc < 0) { + printk(KERN_ERR "%s: Error setting key for crypto context; " + "rc = [%d]. s->auth_tok->token.password.session_key_" + "encryption_key = [0x%p]; mount_crypt_stat->" + "global_default_fn_cipher_key_bytes = [%zd]\n", __func__, + rc, + s->auth_tok->token.password.session_key_encryption_key, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + goto out_free_unlock; + } + rc = crypto_blkcipher_decrypt_iv(&s->desc, &s->dst_sg, &s->src_sg, + s->block_aligned_filename_size); + if (rc) { + printk(KERN_ERR "%s: Error attempting to decrypt filename; " + "rc = [%d]\n", __func__, rc); + goto out_free_unlock; + } + s->i = 0; + while (s->decrypted_filename[s->i] != '\0' + && s->i < s->block_aligned_filename_size) + s->i++; + if (s->i == s->block_aligned_filename_size) { + printk(KERN_WARNING "%s: Invalid tag 70 packet; could not " + "find valid separator between random characters and " + "the filename\n", __func__); + rc = -EINVAL; + goto out_free_unlock; + } + s->i++; + (*filename_size) = (s->block_aligned_filename_size - s->i); + if (!((*filename_size) > 0 && (*filename_size < PATH_MAX))) { + printk(KERN_WARNING "%s: Filename size is [%zd], which is " + "invalid\n", __func__, (*filename_size)); + rc = -EINVAL; + goto out_free_unlock; + } + (*filename) = kmalloc(((*filename_size) + 1), GFP_KERNEL); + if (!(*filename)) { + printk(KERN_ERR "%s: Out of memory whilst attempting to " + "kmalloc [%zd] bytes\n", __func__, + ((*filename_size) + 1)); + rc = -ENOMEM; + goto out_free_unlock; + } + memcpy((*filename), &s->decrypted_filename[s->i], (*filename_size)); + (*filename)[(*filename_size)] = '\0'; +out_free_unlock: + kfree(s->decrypted_filename); +out_unlock: + mutex_unlock(s->tfm_mutex); +out: + if (rc) { + (*packet_size) = 0; + (*filename_size) = 0; + (*filename) = NULL; + } + kfree(s); + return rc; +} + +static int ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok) { int rc = 0; @@ -897,30 +1471,6 @@ out: return rc; } -static int -ecryptfs_find_global_auth_tok_for_sig( - struct ecryptfs_global_auth_tok **global_auth_tok, - struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig) -{ - struct ecryptfs_global_auth_tok *walker; - int rc = 0; - - (*global_auth_tok) = NULL; - mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); - list_for_each_entry(walker, - &mount_crypt_stat->global_auth_tok_list, - mount_crypt_stat_list) { - if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { - (*global_auth_tok) = walker; - goto out; - } - } - rc = -EINVAL; -out: - mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); - return rc; -} - /** * ecryptfs_verify_version * @version: The version number to confirm @@ -990,43 +1540,6 @@ out: } /** - * ecryptfs_find_auth_tok_for_sig - * @auth_tok: Set to the matching auth_tok; NULL if not found - * @crypt_stat: inode crypt_stat crypto context - * @sig: Sig of auth_tok to find - * - * For now, this function simply looks at the registered auth_tok's - * linked off the mount_crypt_stat, so all the auth_toks that can be - * used must be registered at mount time. This function could - * potentially try a lot harder to find auth_tok's (e.g., by calling - * out to ecryptfsd to dynamically retrieve an auth_tok object) so - * that static registration of auth_tok's will no longer be necessary. - * - * Returns zero on no error; non-zero on error - */ -static int -ecryptfs_find_auth_tok_for_sig( - struct ecryptfs_auth_tok **auth_tok, - struct ecryptfs_crypt_stat *crypt_stat, char *sig) -{ - struct ecryptfs_mount_crypt_stat *mount_crypt_stat = - crypt_stat->mount_crypt_stat; - struct ecryptfs_global_auth_tok *global_auth_tok; - int rc = 0; - - (*auth_tok) = NULL; - if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, - mount_crypt_stat, sig)) { - struct key *auth_tok_key; - - rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok, - sig); - } else - (*auth_tok) = global_auth_tok->global_auth_tok; - return rc; -} - -/** * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok. * @auth_tok: The passphrase authentication token to use to encrypt the FEK * @crypt_stat: The cryptographic context @@ -1256,7 +1769,8 @@ find_next_matching_auth_tok: rc = -EINVAL; goto out_wipe_list; } - ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, crypt_stat, + ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, + crypt_stat->mount_crypt_stat, candidate_auth_tok_sig); if (matching_auth_tok) { found_auth_tok = 1; @@ -1336,7 +1850,9 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, int rc; rc = write_tag_66_packet(auth_tok->token.private_key.signature, - ecryptfs_code_for_cipher_string(crypt_stat), + ecryptfs_code_for_cipher_string( + crypt_stat->cipher, + crypt_stat->key_size), crypt_stat, &payload, &payload_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); @@ -1696,7 +2212,8 @@ encrypted_session_key_set: dest[(*packet_size)++] = 0x04; /* version 4 */ /* TODO: Break from RFC2440 so that arbitrary ciphers can be * specified with strings */ - cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); + cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher, + crypt_stat->key_size); if (cipher_code == 0) { ecryptfs_printk(KERN_WARNING, "Unable to generate code for " "cipher [%s]\n", crypt_stat->cipher); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index fd630713c5c7..789cf2e1be1e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -206,7 +206,9 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, - ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; + ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, + ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, + ecryptfs_opt_err }; static const match_table_t tokens = { {ecryptfs_opt_sig, "sig=%s"}, @@ -217,6 +219,9 @@ static const match_table_t tokens = { {ecryptfs_opt_passthrough, "ecryptfs_passthrough"}, {ecryptfs_opt_xattr_metadata, "ecryptfs_xattr_metadata"}, {ecryptfs_opt_encrypted_view, "ecryptfs_encrypted_view"}, + {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, + {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, + {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, {ecryptfs_opt_err, NULL} }; @@ -281,8 +286,11 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) int rc = 0; int sig_set = 0; int cipher_name_set = 0; + int fn_cipher_name_set = 0; int cipher_key_bytes; int cipher_key_bytes_set = 0; + int fn_cipher_key_bytes; + int fn_cipher_key_bytes_set = 0; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; substring_t args[MAX_OPT_ARGS]; @@ -290,7 +298,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) char *sig_src; char *cipher_name_dst; char *cipher_name_src; + char *fn_cipher_name_dst; + char *fn_cipher_name_src; + char *fnek_dst; + char *fnek_src; char *cipher_key_bytes_src; + char *fn_cipher_key_bytes_src; if (!options) { rc = -EINVAL; @@ -322,10 +335,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) global_default_cipher_name; strncpy(cipher_name_dst, cipher_name_src, ECRYPTFS_MAX_CIPHER_NAME_SIZE); - ecryptfs_printk(KERN_DEBUG, - "The mount_crypt_stat " - "global_default_cipher_name set to: " - "[%s]\n", cipher_name_dst); + cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0'; cipher_name_set = 1; break; case ecryptfs_opt_ecryptfs_key_bytes: @@ -335,11 +345,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) &cipher_key_bytes_src, 0); mount_crypt_stat->global_default_cipher_key_size = cipher_key_bytes; - ecryptfs_printk(KERN_DEBUG, - "The mount_crypt_stat " - "global_default_cipher_key_size " - "set to: [%d]\n", mount_crypt_stat-> - global_default_cipher_key_size); cipher_key_bytes_set = 1; break; case ecryptfs_opt_passthrough: @@ -356,11 +361,51 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) mount_crypt_stat->flags |= ECRYPTFS_ENCRYPTED_VIEW_ENABLED; break; + case ecryptfs_opt_fnek_sig: + fnek_src = args[0].from; + fnek_dst = + mount_crypt_stat->global_default_fnek_sig; + strncpy(fnek_dst, fnek_src, ECRYPTFS_SIG_SIZE_HEX); + mount_crypt_stat->global_default_fnek_sig[ + ECRYPTFS_SIG_SIZE_HEX] = '\0'; + rc = ecryptfs_add_global_auth_tok( + mount_crypt_stat, + mount_crypt_stat->global_default_fnek_sig); + if (rc) { + printk(KERN_ERR "Error attempting to register " + "global fnek sig [%s]; rc = [%d]\n", + mount_crypt_stat->global_default_fnek_sig, + rc); + goto out; + } + mount_crypt_stat->flags |= + (ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES + | ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK); + break; + case ecryptfs_opt_fn_cipher: + fn_cipher_name_src = args[0].from; + fn_cipher_name_dst = + mount_crypt_stat->global_default_fn_cipher_name; + strncpy(fn_cipher_name_dst, fn_cipher_name_src, + ECRYPTFS_MAX_CIPHER_NAME_SIZE); + mount_crypt_stat->global_default_fn_cipher_name[ + ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0'; + fn_cipher_name_set = 1; + break; + case ecryptfs_opt_fn_cipher_key_bytes: + fn_cipher_key_bytes_src = args[0].from; + fn_cipher_key_bytes = + (int)simple_strtol(fn_cipher_key_bytes_src, + &fn_cipher_key_bytes_src, 0); + mount_crypt_stat->global_default_fn_cipher_key_bytes = + fn_cipher_key_bytes; + fn_cipher_key_bytes_set = 1; + break; case ecryptfs_opt_err: default: - ecryptfs_printk(KERN_WARNING, - "eCryptfs: unrecognized option '%s'\n", - p); + printk(KERN_WARNING + "%s: eCryptfs: unrecognized option [%s]\n", + __func__, p); } } if (!sig_set) { @@ -374,33 +419,60 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); - strcpy(mount_crypt_stat->global_default_cipher_name, ECRYPTFS_DEFAULT_CIPHER); } - if (!cipher_key_bytes_set) { + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) + && !fn_cipher_name_set) + strcpy(mount_crypt_stat->global_default_fn_cipher_name, + mount_crypt_stat->global_default_cipher_name); + if (!cipher_key_bytes_set) mount_crypt_stat->global_default_cipher_key_size = 0; - } + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) + && !fn_cipher_key_bytes_set) + mount_crypt_stat->global_default_fn_cipher_key_bytes = + mount_crypt_stat->global_default_cipher_key_size; mutex_lock(&key_tfm_list_mutex); if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, - NULL)) + NULL)) { rc = ecryptfs_add_new_key_tfm( NULL, mount_crypt_stat->global_default_cipher_name, mount_crypt_stat->global_default_cipher_key_size); - mutex_unlock(&key_tfm_list_mutex); - if (rc) { - printk(KERN_ERR "Error attempting to initialize cipher with " - "name = [%s] and key size = [%td]; rc = [%d]\n", - mount_crypt_stat->global_default_cipher_name, - mount_crypt_stat->global_default_cipher_key_size, rc); - rc = -EINVAL; - goto out; + if (rc) { + printk(KERN_ERR "Error attempting to initialize " + "cipher with name = [%s] and key size = [%td]; " + "rc = [%d]\n", + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size, + rc); + rc = -EINVAL; + mutex_unlock(&key_tfm_list_mutex); + goto out; + } } + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) + && !ecryptfs_tfm_exists( + mount_crypt_stat->global_default_fn_cipher_name, NULL)) { + rc = ecryptfs_add_new_key_tfm( + NULL, mount_crypt_stat->global_default_fn_cipher_name, + mount_crypt_stat->global_default_fn_cipher_key_bytes); + if (rc) { + printk(KERN_ERR "Error attempting to initialize " + "cipher with name = [%s] and key size = [%td]; " + "rc = [%d]\n", + mount_crypt_stat->global_default_fn_cipher_name, + mount_crypt_stat->global_default_fn_cipher_key_bytes, + rc); + rc = -EINVAL; + mutex_unlock(&key_tfm_list_mutex); + goto out; + } + } + mutex_unlock(&key_tfm_list_mutex); rc = ecryptfs_init_global_auth_toks(mount_crypt_stat); - if (rc) { + if (rc) printk(KERN_WARNING "One or more global auth toks could not " "properly register; rc = [%d]\n", rc); - } out: return rc; } diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 6913f727624d..96ef51489e01 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -193,7 +193,7 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL); if (!(*daemon)) { rc = -ENOMEM; - printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); goto out; } @@ -435,7 +435,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; - printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index efd95a0ed1ea..a67fea655f49 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -199,7 +199,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, if (!msg_ctx->msg) { rc = -ENOMEM; printk(KERN_ERR "%s: Out of memory whilst attempting " - "to kmalloc(%Zd, GFP_KERNEL)\n", __func__, + "to kmalloc(%zd, GFP_KERNEL)\n", __func__, (sizeof(*msg_ctx->msg) + data_size)); goto out_unlock; } @@ -322,7 +322,7 @@ check_list: if (count < total_length) { rc = 0; printk(KERN_WARNING "%s: Only given user buffer of " - "size [%Zd], but we need [%Zd] to read the " + "size [%zd], but we need [%zd] to read the " "pending message\n", __func__, count, total_length); goto out_unlock_msg_ctx; } @@ -376,7 +376,7 @@ static int ecryptfs_miscdev_response(char *data, size_t data_size, if ((sizeof(*msg) + msg->data_len) != data_size) { printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = " - "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__, + "[%zd]; data_size = [%zd]. Invalid packet.\n", __func__, (sizeof(*msg) + msg->data_len), data_size); rc = -EINVAL; goto out; @@ -421,7 +421,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, data = kmalloc(count, GFP_KERNEL); if (!data) { printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count); + "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); goto out; } rc = copy_from_user(data, buf, count); @@ -436,8 +436,8 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, case ECRYPTFS_MSG_RESPONSE: if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { printk(KERN_WARNING "%s: Minimum acceptable packet " - "size is [%Zd], but amount of data written is " - "only [%Zd]. Discarding response packet.\n", + "size is [%zd], but amount of data written is " + "only [%zd]. Discarding response packet.\n", __func__, (1 + 4 + 1 + sizeof(struct ecryptfs_message)), count); @@ -455,9 +455,9 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, } i += packet_size_length; if ((1 + 4 + packet_size_length + packet_size) != count) { - printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])" - " + packet_size([%Zd]))([%Zd]) != " - "count([%Zd]). Invalid packet format.\n", + printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])" + " + packet_size([%zd]))([%zd]) != " + "count([%zd]). Invalid packet format.\n", __func__, packet_size_length, packet_size, (1 + packet_size_length + packet_size), count); goto out_free; diff --git a/fs/exec.c b/fs/exec.c index 9c33f542dc77..71a6efe5d8bd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -232,13 +232,13 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, static int __bprm_mm_init(struct linux_binprm *bprm) { - int err = -ENOMEM; + int err; struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) - goto err; + return -ENOMEM; down_write(&mm->mmap_sem); vma->vm_mm = mm; @@ -251,28 +251,20 @@ static int __bprm_mm_init(struct linux_binprm *bprm) */ vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vma->vm_flags = VM_STACK_FLAGS; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); err = insert_vm_struct(mm, vma); - if (err) { - up_write(&mm->mmap_sem); + if (err) goto err; - } mm->stack_vm = mm->total_vm = 1; up_write(&mm->mmap_sem); - bprm->p = vma->vm_end - sizeof(void *); - return 0; - err: - if (vma) { - bprm->vma = NULL; - kmem_cache_free(vm_area_cachep, vma); - } - + up_write(&mm->mmap_sem); + bprm->vma = NULL; + kmem_cache_free(vm_area_cachep, vma); return err; } @@ -1694,7 +1686,7 @@ int get_dumpable(struct mm_struct *mm) return (ret >= 2) ? 2 : ret; } -int do_coredump(long signr, int exit_code, struct pt_regs * regs) +void do_coredump(long signr, int exit_code, struct pt_regs *regs) { struct core_state core_state; char corename[CORENAME_MAX_SIZE + 1]; @@ -1778,6 +1770,11 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) if (ispipe) { helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); + if (!helper_argv) { + printk(KERN_WARNING "%s failed to allocate memory\n", + __func__); + goto fail_unlock; + } /* Terminate the string before the first option */ delimit = strchr(corename, ' '); if (delimit) @@ -1845,5 +1842,5 @@ fail_unlock: put_cred(cred); coredump_finish(mm); fail: - return retval; + return; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0537c827024..6c46c648430d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1225,11 +1225,11 @@ do { \ } while (0) #ifdef CONFIG_SMP -/* Each CPU can accumulate FBC_BATCH blocks in their local +/* Each CPU can accumulate percpu_counter_batch blocks in their local * counters. So we need to make sure we have free blocks more - * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. + * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. */ -#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) +#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) #else #define EXT4_FREEBLOCKS_WATERMARK 0 #endif diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6702a49992a6..98d3fe7057ef 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2498,7 +2498,7 @@ static int ext4_nonda_switch(struct super_block *sb) /* * switch to non delalloc mode if we are running low * on free block. The free block accounting via percpu - * counters can get slightly wrong with FBC_BATCH getting + * counters can get slightly wrong with percpu_counter_batch getting * accumulated on each CPU without updating global counters * Delalloc need an accurate free block accounting. So switch * to non delalloc when we are near to error range. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d0ff0b8cf309..e5eaa62fd17f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -421,9 +421,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * If we're a pdlfush thread, then implement pdflush collision avoidance * against the entire list. * - * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so - * that it can be located for waiting on in __writeback_single_inode(). - * * If `bdi' is non-zero then we're being asked to writeback a specific queue. * This function assumes that the blockdev superblock's inodes are backed by * a variety of queues, so all inodes are searched. For other superblocks, @@ -443,6 +440,7 @@ void generic_sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) { const unsigned long start = jiffies; /* livelock avoidance */ + int sync = wbc->sync_mode == WB_SYNC_ALL; spin_lock(&inode_lock); if (!wbc->for_kupdate || list_empty(&sb->s_io)) @@ -499,10 +497,6 @@ void generic_sync_sb_inodes(struct super_block *sb, __iget(inode); pages_skipped = wbc->pages_skipped; __writeback_single_inode(inode, wbc); - if (wbc->sync_mode == WB_SYNC_HOLD) { - inode->dirtied_when = jiffies; - list_move(&inode->i_list, &sb->s_dirty); - } if (current_is_pdflush()) writeback_release(bdi); if (wbc->pages_skipped != pages_skipped) { @@ -523,7 +517,49 @@ void generic_sync_sb_inodes(struct super_block *sb, if (!list_empty(&sb->s_more_io)) wbc->more_io = 1; } - spin_unlock(&inode_lock); + + if (sync) { + struct inode *inode, *old_inode = NULL; + + /* + * Data integrity sync. Must wait for all pages under writeback, + * because there may have been pages dirtied before our sync + * call, but which had writeout started before we write it out. + * In which case, the inode may not be on the dirty list, but + * we still have to wait for that writeout. + */ + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + struct address_space *mapping; + + if (inode->i_state & (I_FREEING|I_WILL_FREE)) + continue; + mapping = inode->i_mapping; + if (mapping->nrpages == 0) + continue; + __iget(inode); + spin_unlock(&inode_lock); + /* + * We hold a reference to 'inode' so it couldn't have + * been removed from s_inodes list while we dropped the + * inode_lock. We cannot iput the inode now as we can + * be holding the last reference and we cannot iput it + * under inode_lock. So we keep the reference and iput + * it later. + */ + iput(old_inode); + old_inode = inode; + + filemap_fdatawait(mapping); + + cond_resched(); + + spin_lock(&inode_lock); + } + spin_unlock(&inode_lock); + iput(old_inode); + } else + spin_unlock(&inode_lock); + return; /* Leave any unwritten inodes on s_io */ } EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); @@ -588,8 +624,7 @@ restart: /* * writeback and wait upon the filesystem's dirty inodes. The caller will - * do this in two passes - one to write, and one to wait. WB_SYNC_HOLD is - * used to park the written inodes on sb->s_dirty for the wait pass. + * do this in two passes - one to write, and one to wait. * * A finite limit is set on the number of pages which will be written. * To prevent infinite livelock of sys_sync(). @@ -600,30 +635,21 @@ restart: void sync_inodes_sb(struct super_block *sb, int wait) { struct writeback_control wbc = { - .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, + .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, .range_start = 0, .range_end = LLONG_MAX, }; - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - wbc.nr_to_write = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused) + - nr_dirty + nr_unstable; - wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ - sync_sb_inodes(sb, &wbc); -} + if (!wait) { + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); -/* - * Rather lame livelock avoidance. - */ -static void set_sb_syncing(int val) -{ - struct super_block *sb; - spin_lock(&sb_lock); - list_for_each_entry_reverse(sb, &super_blocks, s_list) - sb->s_syncing = val; - spin_unlock(&sb_lock); + wbc.nr_to_write = nr_dirty + nr_unstable + + (inodes_stat.nr_inodes - inodes_stat.nr_unused); + } else + wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ + + sync_sb_inodes(sb, &wbc); } /** @@ -652,9 +678,6 @@ static void __sync_inodes(int wait) spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_syncing) - continue; - sb->s_syncing = 1; sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); @@ -672,13 +695,10 @@ restart: void sync_inodes(int wait) { - set_sb_syncing(0); __sync_inodes(0); - if (wait) { - set_sb_syncing(0); + if (wait) __sync_inodes(1); - } } /** diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 4f3cab321415..99c99dfb0373 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -48,11 +48,13 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, size_t size; if (!*ppos) { + long value; struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (!fc) return 0; - file->private_data=(void *)(long)atomic_read(&fc->num_waiting); + value = atomic_read(&fc->num_waiting); + file->private_data = (void *)value; fuse_conn_put(fc); } size = sprintf(tmp, "%ld\n", (long)file->private_data); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fba571648a8e..e0c7ada08a1f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -269,7 +269,7 @@ static void flush_bg_queue(struct fuse_conn *fc) * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) - __releases(fc->lock) +__releases(&fc->lock) { void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; @@ -293,13 +293,13 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); if (end) end(fc, req); - else - fuse_put_request(fc, req); + fuse_put_request(fc, req); } static void wait_answer_interruptible(struct fuse_conn *fc, struct fuse_req *req) - __releases(fc->lock) __acquires(fc->lock) +__releases(&fc->lock) +__acquires(&fc->lock) { if (signal_pending(current)) return; @@ -317,7 +317,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) - __releases(fc->lock) __acquires(fc->lock) +__releases(&fc->lock) +__acquires(&fc->lock) { if (!fc->no_interrupt) { /* Any signal may interrupt this */ @@ -380,7 +381,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) } } -void request_send(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; spin_lock(&fc->lock); @@ -399,8 +400,8 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } -static void request_send_nowait_locked(struct fuse_conn *fc, - struct fuse_req *req) +static void fuse_request_send_nowait_locked(struct fuse_conn *fc, + struct fuse_req *req) { req->background = 1; fc->num_background++; @@ -414,11 +415,11 @@ static void request_send_nowait_locked(struct fuse_conn *fc, flush_bg_queue(fc); } -static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) +static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) { spin_lock(&fc->lock); if (fc->connected) { - request_send_nowait_locked(fc, req); + fuse_request_send_nowait_locked(fc, req); spin_unlock(&fc->lock); } else { req->out.h.error = -ENOTCONN; @@ -426,16 +427,16 @@ static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) } } -void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 0; - request_send_nowait(fc, req); + fuse_request_send_nowait(fc, req); } -void request_send_background(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; - request_send_nowait(fc, req); + fuse_request_send_nowait(fc, req); } /* @@ -443,10 +444,11 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) * * fc->connected must have been checked previously */ -void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send_background_locked(struct fuse_conn *fc, + struct fuse_req *req) { req->isreply = 1; - request_send_nowait_locked(fc, req); + fuse_request_send_nowait_locked(fc, req); } /* @@ -539,8 +541,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) BUG_ON(!cs->nr_segs); cs->seglen = cs->iov[0].iov_len; cs->addr = (unsigned long) cs->iov[0].iov_base; - cs->iov ++; - cs->nr_segs --; + cs->iov++; + cs->nr_segs--; } down_read(¤t->mm->mmap_sem); err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, @@ -589,9 +591,11 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, kunmap_atomic(mapaddr, KM_USER1); } while (count) { - int err; - if (!cs->len && (err = fuse_copy_fill(cs))) - return err; + if (!cs->len) { + int err = fuse_copy_fill(cs); + if (err) + return err; + } if (page) { void *mapaddr = kmap_atomic(page, KM_USER1); void *buf = mapaddr + offset; @@ -631,9 +635,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) { while (size) { - int err; - if (!cs->len && (err = fuse_copy_fill(cs))) - return err; + if (!cs->len) { + int err = fuse_copy_fill(cs); + if (err) + return err; + } fuse_copy_do(cs, &val, &size); } return 0; @@ -664,6 +670,8 @@ static int request_pending(struct fuse_conn *fc) /* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) +__releases(&fc->lock) +__acquires(&fc->lock) { DECLARE_WAITQUEUE(wait, current); @@ -691,7 +699,7 @@ static void request_wait(struct fuse_conn *fc) */ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, const struct iovec *iov, unsigned long nr_segs) - __releases(fc->lock) +__releases(&fc->lock) { struct fuse_copy_state cs; struct fuse_in_header ih; @@ -813,6 +821,34 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, return err; } +static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_poll_wakeup_out outarg; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + return fuse_notify_poll_wakeup(fc, &outarg); +} + +static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, + unsigned int size, struct fuse_copy_state *cs) +{ + switch (code) { + case FUSE_NOTIFY_POLL: + return fuse_notify_poll(fc, size, cs); + + default: + return -EINVAL; + } +} + /* Look up request on processing list by unique ID */ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) { @@ -876,9 +912,23 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, err = fuse_copy_one(&cs, &oh, sizeof(oh)); if (err) goto err_finish; + + err = -EINVAL; + if (oh.len != nbytes) + goto err_finish; + + /* + * Zero oh.unique indicates unsolicited notification message + * and error contains notification code. + */ + if (!oh.unique) { + err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs); + fuse_copy_finish(&cs); + return err ? err : nbytes; + } + err = -EINVAL; - if (!oh.unique || oh.error <= -1000 || oh.error > 0 || - oh.len != nbytes) + if (oh.error <= -1000 || oh.error > 0) goto err_finish; spin_lock(&fc->lock); @@ -966,6 +1016,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) * This function releases and reacquires fc->lock */ static void end_requests(struct fuse_conn *fc, struct list_head *head) +__releases(&fc->lock) +__acquires(&fc->lock) { while (!list_empty(head)) { struct fuse_req *req; @@ -988,7 +1040,8 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) * locked). */ static void end_io_requests(struct fuse_conn *fc) - __releases(fc->lock) __acquires(fc->lock) +__releases(&fc->lock) +__acquires(&fc->lock) { while (!list_empty(&fc->io)) { struct fuse_req *req = @@ -1002,11 +1055,11 @@ static void end_io_requests(struct fuse_conn *fc) wake_up(&req->waitq); if (end) { req->end = NULL; - /* The end function will consume this reference */ __fuse_get_request(req); spin_unlock(&fc->lock); wait_event(req->waitq, !req->locked); end(fc, req); + fuse_put_request(fc, req); spin_lock(&fc->lock); } } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 95bc22bdd060..fdff346e96fd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -189,7 +189,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) parent = dget_parent(entry); fuse_lookup_init(fc, req, get_node_id(parent->d_inode), &entry->d_name, &outarg); - request_send(fc, req); + fuse_request_send(fc, req); dput(parent); err = req->out.h.error; fuse_put_request(fc, req); @@ -204,7 +204,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) return 0; } spin_lock(&fc->lock); - fi->nlookup ++; + fi->nlookup++; spin_unlock(&fc->lock); } fuse_put_request(fc, forget_req); @@ -283,7 +283,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, attr_version = fuse_get_attr_version(fc); fuse_lookup_init(fc, req, nodeid, name, outarg); - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT, but with valid timeout */ @@ -369,7 +369,7 @@ static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, { fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); ff->reserved_req->force = 1; - request_send(fc, ff->reserved_req); + fuse_request_send(fc, ff->reserved_req); fuse_put_request(fc, ff->reserved_req); kfree(ff); } @@ -408,7 +408,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, goto out_put_forget_req; err = -ENOMEM; - ff = fuse_file_alloc(); + ff = fuse_file_alloc(fc); if (!ff) goto out_put_request; @@ -432,7 +432,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, req->out.args[0].value = &outentry; req->out.args[1].size = sizeof(outopen); req->out.args[1].value = &outopen; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; if (err) { if (err == -ENOSYS) @@ -502,7 +502,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, else req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err) @@ -631,15 +631,17 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { struct inode *inode = entry->d_inode; - /* Set nlink to zero so the inode can be cleared, if - the inode does have more links this will be - discovered at the next lookup/getattr */ + /* + * Set nlink to zero so the inode can be cleared, if the inode + * does have more links this will be discovered at the next + * lookup/getattr. + */ clear_nlink(inode); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); @@ -662,7 +664,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { @@ -695,7 +697,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, req->in.args[1].value = oldent->d_name.name; req->in.args[2].size = newent->d_name.len + 1; req->in.args[2].value = newent->d_name.name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { @@ -811,7 +813,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, else req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { @@ -911,7 +913,7 @@ static int fuse_access(struct inode *inode, int mask) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { @@ -1033,7 +1035,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->num_pages = 1; req->pages[0] = page; fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); - request_send(fc, req); + fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); @@ -1067,7 +1069,7 @@ static char *read_link(struct dentry *dentry) req->out.numargs = 1; req->out.args[0].size = PAGE_SIZE - 1; req->out.args[0].value = link; - request_send(fc, req); + fuse_request_send(fc, req); if (req->out.h.error) { free_page((unsigned long) link); link = ERR_PTR(req->out.h.error); @@ -1273,7 +1275,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, else req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err) { @@ -1367,7 +1369,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name, req->in.args[1].value = name; req->in.args[2].size = size; req->in.args[2].value = value; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { @@ -1413,7 +1415,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; } - request_send(fc, req); + fuse_request_send(fc, req); ret = req->out.h.error; if (!ret) ret = size ? req->out.args[0].size : outarg.size; @@ -1463,7 +1465,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; } - request_send(fc, req); + fuse_request_send(fc, req); ret = req->out.h.error; if (!ret) ret = size ? req->out.args[0].size : outarg.size; @@ -1496,7 +1498,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name) req->in.numargs = 1; req->in.args[0].size = strlen(name) + 1; req->in.args[0].value = name; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4c9ee7011265..e8162646a9b5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -39,14 +39,14 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, req->out.numargs = 1; req->out.args[0].size = sizeof(*outargp); req->out.args[0].value = outargp; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); return err; } -struct fuse_file *fuse_file_alloc(void) +struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); @@ -58,7 +58,12 @@ struct fuse_file *fuse_file_alloc(void) } else { INIT_LIST_HEAD(&ff->write_entry); atomic_set(&ff->count, 0); + spin_lock(&fc->lock); + ff->kh = ++fc->khctr; + spin_unlock(&fc->lock); } + RB_CLEAR_NODE(&ff->polled_node); + init_waitqueue_head(&ff->poll_wait); } return ff; } @@ -79,7 +84,6 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { dput(req->misc.release.dentry); mntput(req->misc.release.vfsmount); - fuse_put_request(fc, req); } static void fuse_file_put(struct fuse_file *ff) @@ -89,7 +93,7 @@ static void fuse_file_put(struct fuse_file *ff) struct inode *inode = req->misc.release.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); req->end = fuse_release_end; - request_send_background(fc, req); + fuse_request_send_background(fc, req); kfree(ff); } } @@ -109,6 +113,7 @@ void fuse_finish_open(struct inode *inode, struct file *file, int fuse_open_common(struct inode *inode, struct file *file, int isdir) { + struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_open_out outarg; struct fuse_file *ff; int err; @@ -121,7 +126,7 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) if (err) return err; - ff = fuse_file_alloc(); + ff = fuse_file_alloc(fc); if (!ff) return -ENOMEM; @@ -167,7 +172,11 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) spin_lock(&fc->lock); list_del(&ff->write_entry); + if (!RB_EMPTY_NODE(&ff->polled_node)) + rb_erase(&ff->polled_node, &fc->polled_files); spin_unlock(&fc->lock); + + wake_up_interruptible_sync(&ff->poll_wait); /* * Normally this will send the RELEASE request, * however if some asynchronous READ or WRITE requests @@ -280,7 +289,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->force = 1; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { @@ -344,7 +353,7 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) { @@ -396,7 +405,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file, inarg->read_flags |= FUSE_READ_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } - request_send(fc, req); + fuse_request_send(fc, req); return req->out.args[0].size; } @@ -493,7 +502,6 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) } if (req->ff) fuse_file_put(req->ff); - fuse_put_request(fc, req); } static void fuse_send_readpages(struct fuse_req *req, struct file *file, @@ -509,10 +517,11 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file, struct fuse_file *ff = file->private_data; req->ff = fuse_file_get(ff); req->end = fuse_readpages_end; - request_send_background(fc, req); + fuse_request_send_background(fc, req); } else { - request_send(fc, req); + fuse_request_send(fc, req); fuse_readpages_end(fc, req); + fuse_put_request(fc, req); } } @@ -543,7 +552,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) } } req->pages[req->num_pages] = page; - req->num_pages ++; + req->num_pages++; return 0; } @@ -636,7 +645,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } - request_send(fc, req); + fuse_request_send(fc, req); return req->misc.write.out.size; } @@ -1042,7 +1051,6 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { __free_page(req->pages[0]); fuse_file_put(req->ff); - fuse_put_request(fc, req); } static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) @@ -1060,6 +1068,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) /* Called under fc->lock, may release and reacquire it */ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) +__releases(&fc->lock) +__acquires(&fc->lock) { struct fuse_inode *fi = get_fuse_inode(req->inode); loff_t size = i_size_read(req->inode); @@ -1079,13 +1089,14 @@ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) req->in.args[1].size = inarg->size; fi->writectr++; - request_send_background_locked(fc, req); + fuse_request_send_background_locked(fc, req); return; out_free: fuse_writepage_finish(fc, req); spin_unlock(&fc->lock); fuse_writepage_free(fc, req); + fuse_put_request(fc, req); spin_lock(&fc->lock); } @@ -1096,6 +1107,8 @@ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) * Called with fc->lock */ void fuse_flush_writepages(struct inode *inode) +__releases(&fc->lock) +__acquires(&fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1325,7 +1338,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) req->out.numargs = 1; req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) @@ -1357,7 +1370,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) return PTR_ERR(req); fuse_lk_fill(req, file, fl, opcode, pid, flock); - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; /* locking is restartable */ if (err == -EINTR) @@ -1433,7 +1446,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) req->out.numargs = 1; req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err == -ENOSYS) @@ -1470,6 +1483,406 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) return retval; } +static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, + unsigned int nr_segs, size_t bytes, bool to_user) +{ + struct iov_iter ii; + int page_idx = 0; + + if (!bytes) + return 0; + + iov_iter_init(&ii, iov, nr_segs, bytes, 0); + + while (iov_iter_count(&ii)) { + struct page *page = pages[page_idx++]; + size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii)); + void *kaddr, *map; + + kaddr = map = kmap(page); + + while (todo) { + char __user *uaddr = ii.iov->iov_base + ii.iov_offset; + size_t iov_len = ii.iov->iov_len - ii.iov_offset; + size_t copy = min(todo, iov_len); + size_t left; + + if (!to_user) + left = copy_from_user(kaddr, uaddr, copy); + else + left = copy_to_user(uaddr, kaddr, copy); + + if (unlikely(left)) + return -EFAULT; + + iov_iter_advance(&ii, copy); + todo -= copy; + kaddr += copy; + } + + kunmap(map); + } + + return 0; +} + +/* + * For ioctls, there is no generic way to determine how much memory + * needs to be read and/or written. Furthermore, ioctls are allowed + * to dereference the passed pointer, so the parameter requires deep + * copying but FUSE has no idea whatsoever about what to copy in or + * out. + * + * This is solved by allowing FUSE server to retry ioctl with + * necessary in/out iovecs. Let's assume the ioctl implementation + * needs to read in the following structure. + * + * struct a { + * char *buf; + * size_t buflen; + * } + * + * On the first callout to FUSE server, inarg->in_size and + * inarg->out_size will be NULL; then, the server completes the ioctl + * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and + * the actual iov array to + * + * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } + * + * which tells FUSE to copy in the requested area and retry the ioctl. + * On the second round, the server has access to the structure and + * from that it can tell what to look for next, so on the invocation, + * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to + * + * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, + * { .iov_base = a.buf, .iov_len = a.buflen } } + * + * FUSE will copy both struct a and the pointed buffer from the + * process doing the ioctl and retry ioctl with both struct a and the + * buffer. + * + * This time, FUSE server has everything it needs and completes ioctl + * without FUSE_IOCTL_RETRY which finishes the ioctl call. + * + * Copying data out works the same way. + * + * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel + * automatically initializes in and out iovs by decoding @cmd with + * _IOC_* macros and the server is not allowed to request RETRY. This + * limits ioctl data transfers to well-formed ioctls and is the forced + * behavior for all FUSE servers. + */ +static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_ioctl_in inarg = { + .fh = ff->fh, + .cmd = cmd, + .arg = arg, + .flags = flags + }; + struct fuse_ioctl_out outarg; + struct fuse_req *req = NULL; + struct page **pages = NULL; + struct page *iov_page = NULL; + struct iovec *in_iov = NULL, *out_iov = NULL; + unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages; + size_t in_size, out_size, transferred; + int err; + + /* assume all the iovs returned by client always fits in a page */ + BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); + + if (!fuse_allow_task(fc, current)) + return -EACCES; + + err = -EIO; + if (is_bad_inode(inode)) + goto out; + + err = -ENOMEM; + pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); + iov_page = alloc_page(GFP_KERNEL); + if (!pages || !iov_page) + goto out; + + /* + * If restricted, initialize IO parameters as encoded in @cmd. + * RETRY from server is not allowed. + */ + if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { + struct iovec *iov = page_address(iov_page); + + iov->iov_base = (void __user *)arg; + iov->iov_len = _IOC_SIZE(cmd); + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + in_iov = iov; + in_iovs = 1; + } + + if (_IOC_DIR(cmd) & _IOC_READ) { + out_iov = iov; + out_iovs = 1; + } + } + + retry: + inarg.in_size = in_size = iov_length(in_iov, in_iovs); + inarg.out_size = out_size = iov_length(out_iov, out_iovs); + + /* + * Out data can be used either for actual out data or iovs, + * make sure there always is at least one page. + */ + out_size = max_t(size_t, out_size, PAGE_SIZE); + max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); + + /* make sure there are enough buffer pages and init request with them */ + err = -ENOMEM; + if (max_pages > FUSE_MAX_PAGES_PER_REQ) + goto out; + while (num_pages < max_pages) { + pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!pages[num_pages]) + goto out; + num_pages++; + } + + req = fuse_get_req(fc); + if (IS_ERR(req)) { + err = PTR_ERR(req); + req = NULL; + goto out; + } + memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); + req->num_pages = num_pages; + + /* okay, let's send it to the client */ + req->in.h.opcode = FUSE_IOCTL; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + if (in_size) { + req->in.numargs++; + req->in.args[1].size = in_size; + req->in.argpages = 1; + + err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size, + false); + if (err) + goto out; + } + + req->out.numargs = 2; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + req->out.args[1].size = out_size; + req->out.argpages = 1; + req->out.argvar = 1; + + fuse_request_send(fc, req); + err = req->out.h.error; + transferred = req->out.args[1].size; + fuse_put_request(fc, req); + req = NULL; + if (err) + goto out; + + /* did it ask for retry? */ + if (outarg.flags & FUSE_IOCTL_RETRY) { + char *vaddr; + + /* no retry if in restricted mode */ + err = -EIO; + if (!(flags & FUSE_IOCTL_UNRESTRICTED)) + goto out; + + in_iovs = outarg.in_iovs; + out_iovs = outarg.out_iovs; + + /* + * Make sure things are in boundary, separate checks + * are to protect against overflow. + */ + err = -ENOMEM; + if (in_iovs > FUSE_IOCTL_MAX_IOV || + out_iovs > FUSE_IOCTL_MAX_IOV || + in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) + goto out; + + err = -EIO; + if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred) + goto out; + + /* okay, copy in iovs and retry */ + vaddr = kmap_atomic(pages[0], KM_USER0); + memcpy(page_address(iov_page), vaddr, transferred); + kunmap_atomic(vaddr, KM_USER0); + + in_iov = page_address(iov_page); + out_iov = in_iov + in_iovs; + + goto retry; + } + + err = -EIO; + if (transferred > inarg.out_size) + goto out; + + err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true); + out: + if (req) + fuse_put_request(fc, req); + if (iov_page) + __free_page(iov_page); + while (num_pages) + __free_page(pages[--num_pages]); + kfree(pages); + + return err ? err : outarg.result; +} + +static long fuse_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return fuse_file_do_ioctl(file, cmd, arg, 0); +} + +static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT); +} + +/* + * All files which have been polled are linked to RB tree + * fuse_conn->polled_files which is indexed by kh. Walk the tree and + * find the matching one. + */ +static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, + struct rb_node **parent_out) +{ + struct rb_node **link = &fc->polled_files.rb_node; + struct rb_node *last = NULL; + + while (*link) { + struct fuse_file *ff; + + last = *link; + ff = rb_entry(last, struct fuse_file, polled_node); + + if (kh < ff->kh) + link = &last->rb_left; + else if (kh > ff->kh) + link = &last->rb_right; + else + return link; + } + + if (parent_out) + *parent_out = last; + return link; +} + +/* + * The file is about to be polled. Make sure it's on the polled_files + * RB tree. Note that files once added to the polled_files tree are + * not removed before the file is released. This is because a file + * polled once is likely to be polled again. + */ +static void fuse_register_polled_file(struct fuse_conn *fc, + struct fuse_file *ff) +{ + spin_lock(&fc->lock); + if (RB_EMPTY_NODE(&ff->polled_node)) { + struct rb_node **link, *parent; + + link = fuse_find_polled_node(fc, ff->kh, &parent); + BUG_ON(*link); + rb_link_node(&ff->polled_node, parent, link); + rb_insert_color(&ff->polled_node, &fc->polled_files); + } + spin_unlock(&fc->lock); +} + +static unsigned fuse_file_poll(struct file *file, poll_table *wait) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; + struct fuse_poll_out outarg; + struct fuse_req *req; + int err; + + if (fc->no_poll) + return DEFAULT_POLLMASK; + + poll_wait(file, &ff->poll_wait, wait); + + /* + * Ask for notification iff there's someone waiting for it. + * The client may ignore the flag and always notify. + */ + if (waitqueue_active(&ff->poll_wait)) { + inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; + fuse_register_polled_file(fc, ff); + } + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->in.h.opcode = FUSE_POLL; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + + if (!err) + return outarg.revents; + if (err == -ENOSYS) { + fc->no_poll = 1; + return DEFAULT_POLLMASK; + } + return POLLERR; +} + +/* + * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and + * wakes up the poll waiters. + */ +int fuse_notify_poll_wakeup(struct fuse_conn *fc, + struct fuse_notify_poll_wakeup_out *outarg) +{ + u64 kh = outarg->kh; + struct rb_node **link; + + spin_lock(&fc->lock); + + link = fuse_find_polled_node(fc, kh, NULL); + if (*link) { + struct fuse_file *ff; + + ff = rb_entry(*link, struct fuse_file, polled_node); + wake_up_interruptible_sync(&ff->poll_wait); + } + + spin_unlock(&fc->lock); + return 0; +} + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read = do_sync_read, @@ -1484,6 +1897,9 @@ static const struct file_operations fuse_file_operations = { .lock = fuse_file_lock, .flock = fuse_file_flock, .splice_read = generic_file_splice_read, + .unlocked_ioctl = fuse_file_ioctl, + .compat_ioctl = fuse_file_compat_ioctl, + .poll = fuse_file_poll, }; static const struct file_operations fuse_direct_io_file_operations = { @@ -1496,6 +1912,9 @@ static const struct file_operations fuse_direct_io_file_operations = { .fsync = fuse_fsync, .lock = fuse_file_lock, .flock = fuse_file_flock, + .unlocked_ioctl = fuse_file_ioctl, + .compat_ioctl = fuse_file_compat_ioctl, + .poll = fuse_file_poll, /* no mmap and splice_read */ }; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 35accfdd747f..5e64b815a5a1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -19,6 +19,8 @@ #include <linux/backing-dev.h> #include <linux/mutex.h> #include <linux/rwsem.h> +#include <linux/rbtree.h> +#include <linux/poll.h> /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -100,6 +102,9 @@ struct fuse_file { /** Request reserved for flush and release */ struct fuse_req *reserved_req; + /** Kernel file handle guaranteed to be unique */ + u64 kh; + /** File handle used by userspace */ u64 fh; @@ -108,6 +113,12 @@ struct fuse_file { /** Entry on inode's write_files list */ struct list_head write_entry; + + /** RB node to be linked on fuse_conn->polled_files */ + struct rb_node polled_node; + + /** Wait queue head for poll */ + wait_queue_head_t poll_wait; }; /** One input argument of a request */ @@ -322,6 +333,12 @@ struct fuse_conn { /** The list of requests under I/O */ struct list_head io; + /** The next unique kernel file handle */ + u64 khctr; + + /** rbtree of fuse_files waiting for poll events indexed by ph */ + struct rb_root polled_files; + /** Number of requests currently in the background */ unsigned num_background; @@ -355,19 +372,19 @@ struct fuse_conn { /** Connection failed (version mismatch). Cannot race with setting other bitfields since it is only set once in INIT reply, before any other request, and never cleared */ - unsigned conn_error : 1; + unsigned conn_error:1; /** Connection successful. Only set in INIT */ - unsigned conn_init : 1; + unsigned conn_init:1; /** Do readpages asynchronously? Only set in INIT */ - unsigned async_read : 1; + unsigned async_read:1; /** Do not send separate SETATTR request before open(O_TRUNC) */ - unsigned atomic_o_trunc : 1; + unsigned atomic_o_trunc:1; /** Filesystem supports NFS exporting. Only set in INIT */ - unsigned export_support : 1; + unsigned export_support:1; /* * The following bitfields are only for optimization purposes @@ -375,43 +392,46 @@ struct fuse_conn { */ /** Is fsync not implemented by fs? */ - unsigned no_fsync : 1; + unsigned no_fsync:1; /** Is fsyncdir not implemented by fs? */ - unsigned no_fsyncdir : 1; + unsigned no_fsyncdir:1; /** Is flush not implemented by fs? */ - unsigned no_flush : 1; + unsigned no_flush:1; /** Is setxattr not implemented by fs? */ - unsigned no_setxattr : 1; + unsigned no_setxattr:1; /** Is getxattr not implemented by fs? */ - unsigned no_getxattr : 1; + unsigned no_getxattr:1; /** Is listxattr not implemented by fs? */ - unsigned no_listxattr : 1; + unsigned no_listxattr:1; /** Is removexattr not implemented by fs? */ - unsigned no_removexattr : 1; + unsigned no_removexattr:1; /** Are file locking primitives not implemented by fs? */ - unsigned no_lock : 1; + unsigned no_lock:1; /** Is access not implemented by fs? */ - unsigned no_access : 1; + unsigned no_access:1; /** Is create not implemented by fs? */ - unsigned no_create : 1; + unsigned no_create:1; /** Is interrupt not implemented by fs? */ - unsigned no_interrupt : 1; + unsigned no_interrupt:1; /** Is bmap not implemented by fs? */ - unsigned no_bmap : 1; + unsigned no_bmap:1; + + /** Is poll not implemented by fs? */ + unsigned no_poll:1; /** Do multi-page cached writes */ - unsigned big_writes : 1; + unsigned big_writes:1; /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -445,6 +465,9 @@ struct fuse_conn { /** Version counter for attribute changes */ u64 attr_version; + + /** Called on final put */ + void (*release)(struct fuse_conn *); }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -499,7 +522,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, */ int fuse_open_common(struct inode *inode, struct file *file, int isdir); -struct fuse_file *fuse_file_alloc(void); +struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); void fuse_file_free(struct fuse_file *ff); void fuse_finish_open(struct inode *inode, struct file *file, struct fuse_file *ff, struct fuse_open_out *outarg); @@ -519,6 +542,12 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, int isdir); /** + * Notify poll wakeup + */ +int fuse_notify_poll_wakeup(struct fuse_conn *fc, + struct fuse_notify_poll_wakeup_out *outarg); + +/** * Initialize file operations on a regular file */ void fuse_init_file_inode(struct inode *inode); @@ -593,19 +622,20 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); /** * Send a request (synchronous) */ -void request_send(struct fuse_conn *fc, struct fuse_req *req); +void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req); /** * Send a request with no reply */ -void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); +void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); /** * Send a request in the background */ -void request_send_background(struct fuse_conn *fc, struct fuse_req *req); +void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); -void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req); +void fuse_request_send_background_locked(struct fuse_conn *fc, + struct fuse_req *req); /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); @@ -623,6 +653,11 @@ void fuse_invalidate_entry_cache(struct dentry *entry); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); /** + * Initialize fuse_conn + */ +int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb); + +/** * Release reference to fuse_conn */ void fuse_conn_put(struct fuse_conn *fc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2e99f34b4435..47c96fdca1ac 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -37,10 +37,10 @@ struct fuse_mount_data { unsigned rootmode; unsigned user_id; unsigned group_id; - unsigned fd_present : 1; - unsigned rootmode_present : 1; - unsigned user_id_present : 1; - unsigned group_id_present : 1; + unsigned fd_present:1; + unsigned rootmode_present:1; + unsigned user_id_present:1; + unsigned group_id_present:1; unsigned flags; unsigned max_read; unsigned blksize; @@ -94,7 +94,7 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_forget_in); req->in.args[0].value = inarg; - request_send_noreply(fc, req); + fuse_request_send_noreply(fc, req); } static void fuse_clear_inode(struct inode *inode) @@ -250,7 +250,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, fi = get_fuse_inode(inode); spin_lock(&fc->lock); - fi->nlookup ++; + fi->nlookup++; spin_unlock(&fc->lock); fuse_change_attributes(inode, attr, attr_valid, attr_version); @@ -269,7 +269,7 @@ static void fuse_send_destroy(struct fuse_conn *fc) fc->destroy_req = NULL; req->in.h.opcode = FUSE_DESTROY; req->force = 1; - request_send(fc, req); + fuse_request_send(fc, req); fuse_put_request(fc, req); } } @@ -334,7 +334,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) req->out.args[0].size = fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); req->out.args[0].value = &outarg; - request_send(fc, req); + fuse_request_send(fc, req); err = req->out.h.error; if (!err) convert_fuse_statfs(buf, &outarg.st); @@ -462,68 +462,69 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -static struct fuse_conn *new_conn(struct super_block *sb) +int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) { - struct fuse_conn *fc; int err; - fc = kzalloc(sizeof(*fc), GFP_KERNEL); - if (fc) { - spin_lock_init(&fc->lock); - mutex_init(&fc->inst_mutex); - atomic_set(&fc->count, 1); - init_waitqueue_head(&fc->waitq); - init_waitqueue_head(&fc->blocked_waitq); - init_waitqueue_head(&fc->reserved_req_waitq); - INIT_LIST_HEAD(&fc->pending); - INIT_LIST_HEAD(&fc->processing); - INIT_LIST_HEAD(&fc->io); - INIT_LIST_HEAD(&fc->interrupts); - INIT_LIST_HEAD(&fc->bg_queue); - atomic_set(&fc->num_waiting, 0); - fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - fc->bdi.unplug_io_fn = default_unplug_io_fn; - /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; - fc->dev = sb->s_dev; - err = bdi_init(&fc->bdi); - if (err) - goto error_kfree; - if (sb->s_bdev) { - err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", - MAJOR(fc->dev), MINOR(fc->dev)); - } else { - err = bdi_register_dev(&fc->bdi, fc->dev); - } - if (err) - goto error_bdi_destroy; - /* - * For a single fuse filesystem use max 1% of dirty + - * writeback threshold. - * - * This gives about 1M of write buffer for memory maps on a - * machine with 1G and 10% dirty_ratio, which should be more - * than enough. - * - * Privileged users can raise it by writing to - * - * /sys/class/bdi/<bdi>/max_ratio - */ - bdi_set_max_ratio(&fc->bdi, 1); - fc->reqctr = 0; - fc->blocked = 1; - fc->attr_version = 1; - get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); + memset(fc, 0, sizeof(*fc)); + spin_lock_init(&fc->lock); + mutex_init(&fc->inst_mutex); + atomic_set(&fc->count, 1); + init_waitqueue_head(&fc->waitq); + init_waitqueue_head(&fc->blocked_waitq); + init_waitqueue_head(&fc->reserved_req_waitq); + INIT_LIST_HEAD(&fc->pending); + INIT_LIST_HEAD(&fc->processing); + INIT_LIST_HEAD(&fc->io); + INIT_LIST_HEAD(&fc->interrupts); + INIT_LIST_HEAD(&fc->bg_queue); + INIT_LIST_HEAD(&fc->entry); + atomic_set(&fc->num_waiting, 0); + fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + fc->bdi.unplug_io_fn = default_unplug_io_fn; + /* fuse does it's own writeback accounting */ + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + fc->khctr = 0; + fc->polled_files = RB_ROOT; + fc->dev = sb->s_dev; + err = bdi_init(&fc->bdi); + if (err) + goto error_mutex_destroy; + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); } - return fc; + if (err) + goto error_bdi_destroy; + /* + * For a single fuse filesystem use max 1% of dirty + + * writeback threshold. + * + * This gives about 1M of write buffer for memory maps on a + * machine with 1G and 10% dirty_ratio, which should be more + * than enough. + * + * Privileged users can raise it by writing to + * + * /sys/class/bdi/<bdi>/max_ratio + */ + bdi_set_max_ratio(&fc->bdi, 1); + fc->reqctr = 0; + fc->blocked = 1; + fc->attr_version = 1; + get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); -error_bdi_destroy: + return 0; + + error_bdi_destroy: bdi_destroy(&fc->bdi); -error_kfree: + error_mutex_destroy: mutex_destroy(&fc->inst_mutex); - kfree(fc); - return NULL; + return err; } +EXPORT_SYMBOL_GPL(fuse_conn_init); void fuse_conn_put(struct fuse_conn *fc) { @@ -532,7 +533,7 @@ void fuse_conn_put(struct fuse_conn *fc) fuse_request_free(fc->destroy_req); mutex_destroy(&fc->inst_mutex); bdi_destroy(&fc->bdi); - kfree(fc); + fc->release(fc); } } @@ -542,7 +543,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) return fc; } -static struct inode *get_root_inode(struct super_block *sb, unsigned mode) +static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) { struct fuse_attr attr; memset(&attr, 0, sizeof(attr)); @@ -553,8 +554,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode) return fuse_iget(sb, 1, 0, &attr, 0, 0); } -struct fuse_inode_handle -{ +struct fuse_inode_handle { u64 nodeid; u32 generation; }; @@ -761,7 +761,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } - fuse_put_request(fc, req); fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } @@ -787,7 +786,12 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) req->out.args[0].size = sizeof(struct fuse_init_out); req->out.args[0].value = &req->misc.init_out; req->end = process_init_reply; - request_send_background(fc, req); + fuse_request_send_background(fc, req); +} + +static void fuse_free_conn(struct fuse_conn *fc) +{ + kfree(fc); } static int fuse_fill_super(struct super_block *sb, void *data, int silent) @@ -828,10 +832,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (file->f_op != &fuse_dev_operations) return -EINVAL; - fc = new_conn(sb); + fc = kmalloc(sizeof(*fc), GFP_KERNEL); if (!fc) return -ENOMEM; + err = fuse_conn_init(fc, sb); + if (err) { + kfree(fc); + return err; + } + + fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; @@ -841,7 +852,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = fc; err = -ENOMEM; - root = get_root_inode(sb, d.rootmode); + root = fuse_get_root_inode(sb, d.rootmode); if (!root) goto err; @@ -952,7 +963,7 @@ static inline void unregister_fuseblk(void) static void fuse_inode_init_once(void *foo) { - struct inode * inode = foo; + struct inode *inode = foo; inode_init_once(inode); } @@ -1031,7 +1042,7 @@ static int __init fuse_init(void) { int res; - printk("fuse init (API version %i.%i)\n", + printk(KERN_INFO "fuse init (API version %i.%i)\n", FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); INIT_LIST_HEAD(&fuse_conn_list); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0ab0c6f5f438..6903d37af037 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -252,6 +252,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, for (;;) { struct page *page; unsigned long nr, ret; + int ra; /* nr is the maximum number of bytes to copy from this page */ nr = huge_page_size(h); @@ -274,16 +275,19 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, */ ret = len < nr ? len : nr; if (clear_user(buf, ret)) - ret = -EFAULT; + ra = -EFAULT; + else + ra = 0; } else { /* * We have the page, copy it to user space buffer. */ - ret = hugetlbfs_read_actor(page, offset, buf, len, nr); + ra = hugetlbfs_read_actor(page, offset, buf, len, nr); + ret = ra; } - if (ret < 0) { + if (ra < 0) { if (retval == 0) - retval = ret; + retval = ra; if (page) page_cache_release(page); goto out; diff --git a/fs/inode.c b/fs/inode.c index bd48e5e6d3e8..7a6e8c2ff7b1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -110,8 +110,8 @@ static void wake_up_inode(struct inode *inode) /** * inode_init_always - perform inode structure intialisation - * @sb - superblock inode belongs to. - * @inode - inode to initialise + * @sb: superblock inode belongs to + * @inode: inode to initialise * * These are initializations that need to be done on every inode * allocation as the fields are not initialised by slab allocation. @@ -166,7 +166,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); + mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; mapping->writeback_index = 0; @@ -576,8 +576,8 @@ __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, /** * inode_add_to_lists - add a new inode to relevant lists - * @sb - superblock inode belongs to. - * @inode - inode to mark in use + * @sb: superblock inode belongs to + * @inode: inode to mark in use * * When an inode is allocated it needs to be accounted for, added to the in use * list, the owning superblock and the inode hash. This needs to be done under @@ -601,7 +601,7 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists); * @sb: superblock * * Allocates a new inode for given superblock. The default gfp_mask - * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE. + * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. * If HIGHMEM pages are unsuitable or it is known that pages allocated * for the page cache are not reclaimable or migratable, * mapping_set_gfp_mask() must be called with suitable flags on the diff --git a/fs/minix/dir.c b/fs/minix/dir.c index f70433816a38..d4946c4c90e2 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -280,7 +280,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page); + pos = page_offset(page) + p - (char *)page_address(page); err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) diff --git a/fs/mpage.c b/fs/mpage.c index 552b80b3facc..16c3ef37eae3 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -241,7 +241,6 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, first_hole = page_block; page_block++; block_in_file++; - clear_buffer_mapped(map_bh); continue; } @@ -308,7 +307,10 @@ alloc_new: goto alloc_new; } - if (buffer_boundary(map_bh) || (first_hole != blocks_per_page)) + relative_block = block_in_file - *first_logical_block; + nblocks = map_bh->b_size >> blkbits; + if ((buffer_boundary(map_bh) && relative_block == nblocks) || + (first_hole != blocks_per_page)) bio = mpage_bio_submit(READ, bio); else *last_block_in_bio = blocks[blocks_per_page - 1]; diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c index 335b003dddf9..0af3349de851 100644 --- a/fs/ncpfs/getopt.c +++ b/fs/ncpfs/getopt.c @@ -16,7 +16,6 @@ * @opts: an array of &struct option entries controlling parser operations * @optopt: output; will contain the current option * @optarg: output; will contain the value (if one exists) - * @flag: output; may be NULL; should point to a long for or'ing flags * @value: output; may be NULL; will be overwritten with the integer value * of the current argument. * diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3a8bdd7f5756..94063840832a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -396,7 +396,9 @@ static int show_smap(struct seq_file *m, void *v) "Private_Clean: %8lu kB\n" "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" - "Swap: %8lu kB\n", + "Swap: %8lu kB\n" + "KernelPageSize: %8lu kB\n" + "MMUPageSize: %8lu kB\n", (vma->vm_end - vma->vm_start) >> 10, mss.resident >> 10, (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), @@ -405,7 +407,9 @@ static int show_smap(struct seq_file *m, void *v) mss.private_clean >> 10, mss.private_dirty >> 10, mss.referenced >> 10, - mss.swap >> 10); + mss.swap >> 10, + vma_kernel_pagesize(vma) >> 10, + vma_mmu_pagesize(vma) >> 10); if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; diff --git a/fs/select.c b/fs/select.c index 87df51eadcf2..08b91beed806 100644 --- a/fs/select.c +++ b/fs/select.c @@ -109,11 +109,11 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, void poll_initwait(struct poll_wqueues *pwq) { init_poll_funcptr(&pwq->pt, __pollwait); + pwq->polling_task = current; pwq->error = 0; pwq->table = NULL; pwq->inline_index = 0; } - EXPORT_SYMBOL(poll_initwait); static void free_poll_entry(struct poll_table_entry *entry) @@ -142,12 +142,10 @@ void poll_freewait(struct poll_wqueues *pwq) free_page((unsigned long) old); } } - EXPORT_SYMBOL(poll_freewait); -static struct poll_table_entry *poll_get_entry(poll_table *_p) +static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) { - struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); struct poll_table_page *table = p->table; if (p->inline_index < N_INLINE_POLL_ENTRIES) @@ -159,7 +157,6 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p) new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); if (!new_table) { p->error = -ENOMEM; - __set_current_state(TASK_RUNNING); return NULL; } new_table->entry = new_table->entries; @@ -171,20 +168,75 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p) return table->entry++; } +static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct poll_wqueues *pwq = wait->private; + DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); + + /* + * Although this function is called under waitqueue lock, LOCK + * doesn't imply write barrier and the users expect write + * barrier semantics on wakeup functions. The following + * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() + * and is paired with set_mb() in poll_schedule_timeout. + */ + smp_wmb(); + pwq->triggered = 1; + + /* + * Perform the default wake up operation using a dummy + * waitqueue. + * + * TODO: This is hacky but there currently is no interface to + * pass in @sync. @sync is scheduled to be removed and once + * that happens, wake_up_process() can be used directly. + */ + return default_wake_function(&dummy_wait, mode, sync, key); +} + /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - struct poll_table_entry *entry = poll_get_entry(p); + struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); + struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; get_file(filp); entry->filp = filp; entry->wait_address = wait_address; - init_waitqueue_entry(&entry->wait, current); + init_waitqueue_func_entry(&entry->wait, pollwake); + entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); } +int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack) +{ + int rc = -EINTR; + + set_current_state(state); + if (!pwq->triggered) + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); + + /* + * Prepare for the next iteration. + * + * The following set_mb() serves two purposes. First, it's + * the counterpart rmb of the wmb in pollwake() such that data + * written before wake up is always visible after wake up. + * Second, the full barrier guarantees that triggered clearing + * doesn't pass event check of the next iteration. Note that + * this problem doesn't exist for the first iteration as + * add_wait_queue() has full barrier semantics. + */ + set_mb(pwq->triggered, 0); + + return rc; +} +EXPORT_SYMBOL(poll_schedule_timeout); + /** * poll_select_set_timeout - helper function to setup the timeout value * @to: pointer to timespec variable for the final timeout @@ -340,8 +392,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - set_current_state(TASK_INTERRUPTIBLE); - inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -411,10 +461,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) to = &expire; } - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, + to, slack)) timed_out = 1; } - __set_current_state(TASK_RUNNING); poll_freewait(&table); @@ -666,7 +716,6 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (;;) { struct poll_list *walk; - set_current_state(TASK_INTERRUPTIBLE); for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -709,10 +758,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list, to = &expire; } - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; } - __set_current_state(TASK_RUNNING); return count; } diff --git a/fs/sync.c b/fs/sync.c index 0921d6d4b5e6..ac02b56548bc 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -295,7 +295,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset, if (flags & SYNC_FILE_RANGE_WRITE) { ret = __filemap_fdatawrite_range(mapping, offset, endbyte, - WB_SYNC_NONE); + WB_SYNC_ALL); if (ret < 0) goto out; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 0d7564b95f8e..89556ee72518 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -432,12 +432,19 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) int i, err; struct ubifs_info *c = sb->s_fs_info; struct writeback_control wbc = { - .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, + .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, .range_start = 0, .range_end = LLONG_MAX, .nr_to_write = LONG_MAX, }; + /* + * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an + * advisory thing to help the file system shove lots of data into the + * queues. If some gets missed then it'll be picked up on the second + * '->sync_fs()' call, with non-zero @wait. + */ + if (sb->s_flags & MS_RDONLY) return 0; diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h index 46d696b331e7..296c35cfb207 100644 --- a/include/asm-frv/atomic.h +++ b/include/asm-frv/atomic.h @@ -35,10 +35,6 @@ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) #define atomic_set(v, i) (((v)->counter) = (i)) diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 8af276361bf2..37b82cb96c89 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -28,6 +28,17 @@ struct bug_entry { #define BUGFLAG_WARNING (1<<0) #endif /* CONFIG_GENERIC_BUG */ +/* + * Don't use BUG() or BUG_ON() unless there's really no way out; one + * example might be detecting data structure corruption in the middle + * of an operation that can't be backed out of. If the (sub)system + * can somehow continue operating, perhaps with reduced functionality, + * it's probably not BUG-worthy. + * + * If you're tempted to BUG(), think again: is completely giving up + * really the *only* solution? There are usually better options, where + * users don't need to reboot ASAP and can mostly shut down cleanly. + */ #ifndef HAVE_ARCH_BUG #define BUG() do { \ printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ @@ -39,6 +50,12 @@ struct bug_entry { #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0) #endif +/* + * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report + * significant issues that need prompt attention if they should ever + * appear at runtime. Use the versions with printk format strings + * to provide better diagnostics. + */ #ifndef __WARN #ifndef __ASSEMBLY__ extern void warn_slowpath(const char *file, const int line, diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h index 33d7d04e4119..dbd6150763e9 100644 --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -2,7 +2,6 @@ #define _ASM_GENERIC_LOCAL_H #include <linux/percpu.h> -#include <linux/hardirq.h> #include <asm/atomic.h> #include <asm/types.h> diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 36fa286adad5..4c8d0afae711 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -69,15 +69,8 @@ }) #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ -#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE -struct page; -/* this is useful when inlined pfn_to_page is too big */ -extern struct page *pfn_to_page(unsigned long pfn); -extern unsigned long page_to_pfn(struct page *page); -#else #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page -#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ #endif /* __ASSEMBLY__ */ diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h index 3a38ffe4a4f4..2eed30f84080 100644 --- a/include/asm-m32r/atomic.h +++ b/include/asm-m32r/atomic.h @@ -9,6 +9,7 @@ * Copyright (C) 2004 Hirokazu Takata <takata at linux-m32r.org> */ +#include <linux/types.h> #include <asm/assembler.h> #include <asm/system.h> @@ -17,13 +18,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; - #define ATOMIC_INIT(i) { (i) } /** diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h index 4915294fea63..eb0ab9d4ee77 100644 --- a/include/asm-m68k/atomic.h +++ b/include/asm-m68k/atomic.h @@ -1,7 +1,7 @@ #ifndef __ARCH_M68K_ATOMIC__ #define __ARCH_M68K_ATOMIC__ - +#include <linux/types.h> #include <asm/system.h> /* @@ -13,7 +13,6 @@ * We do not have SMP m68k systems, so we don't have to deal with that. */ -typedef struct { int counter; } atomic_t; #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) diff --git a/include/asm-mn10300/atomic.h b/include/asm-mn10300/atomic.h index 27c9690b9574..bc064825f9b1 100644 --- a/include/asm-mn10300/atomic.h +++ b/include/asm-mn10300/atomic.h @@ -20,15 +20,6 @@ * resource counting etc.. */ -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { - int counter; -} atomic_t; - #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h index b3b23540f14d..67ad67bed8c1 100644 --- a/include/asm-xtensa/atomic.h +++ b/include/asm-xtensa/atomic.h @@ -14,8 +14,7 @@ #define _XTENSA_ATOMIC_H #include <linux/stringify.h> - -typedef struct { volatile int counter; } atomic_t; +#include <linux/types.h> #ifdef __KERNEL__ #include <asm/processor.h> diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index f4d05ccd731f..91a773993a5c 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,6 +10,7 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H +#include <linux/string.h> #include <linux/types.h> #define AUTOFS_DEVICE_NAME "autofs" @@ -25,6 +26,60 @@ * An ioctl interface for autofs mount point control. */ +struct args_protover { + __u32 version; +}; + +struct args_protosubver { + __u32 sub_version; +}; + +struct args_openmount { + __u32 devid; +}; + +struct args_ready { + __u32 token; +}; + +struct args_fail { + __u32 token; + __s32 status; +}; + +struct args_setpipefd { + __s32 pipefd; +}; + +struct args_timeout { + __u64 timeout; +}; + +struct args_requester { + __u32 uid; + __u32 gid; +}; + +struct args_expire { + __u32 how; +}; + +struct args_askumount { + __u32 may_umount; +}; + +struct args_ismountpoint { + union { + struct args_in { + __u32 type; + } in; + struct args_out { + __u32 devid; + __u32 magic; + } out; + }; +}; + /* * All the ioctls use this structure. * When sending a path size must account for the total length @@ -39,20 +94,32 @@ struct autofs_dev_ioctl { * including this struct */ __s32 ioctlfd; /* automount command fd */ - __u32 arg1; /* Command parameters */ - __u32 arg2; + /* Command parameters */ + + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; char path[0]; }; static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) { + memset(in, 0, sizeof(struct autofs_dev_ioctl)); in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; in->size = sizeof(struct autofs_dev_ioctl); in->ioctlfd = -1; - in->arg1 = 0; - in->arg2 = 0; return; } diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 2253716d4b92..55fa478bd639 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -29,10 +29,64 @@ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 -#define AUTOFS_TYPE_ANY 0x0000 -#define AUTOFS_TYPE_INDIRECT 0x0001 -#define AUTOFS_TYPE_DIRECT 0x0002 -#define AUTOFS_TYPE_OFFSET 0x0004 +#define AUTOFS_TYPE_ANY 0U +#define AUTOFS_TYPE_INDIRECT 1U +#define AUTOFS_TYPE_DIRECT 2U +#define AUTOFS_TYPE_OFFSET 4U + +static inline void set_autofs_type_indirect(unsigned int *type) +{ + *type = AUTOFS_TYPE_INDIRECT; + return; +} + +static inline unsigned int autofs_type_indirect(unsigned int type) +{ + return (type == AUTOFS_TYPE_INDIRECT); +} + +static inline void set_autofs_type_direct(unsigned int *type) +{ + *type = AUTOFS_TYPE_DIRECT; + return; +} + +static inline unsigned int autofs_type_direct(unsigned int type) +{ + return (type == AUTOFS_TYPE_DIRECT); +} + +static inline void set_autofs_type_offset(unsigned int *type) +{ + *type = AUTOFS_TYPE_OFFSET; + return; +} + +static inline unsigned int autofs_type_offset(unsigned int type) +{ + return (type == AUTOFS_TYPE_OFFSET); +} + +static inline unsigned int autofs_type_trigger(unsigned int type) +{ + return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET); +} + +/* + * This isn't really a type as we use it to say "no type set" to + * indicate we want to search for "any" mount in the + * autofs_dev_ioctl_ismountpoint() device ioctl function. + */ +static inline void set_autofs_type_any(unsigned int *type) +{ + *type = AUTOFS_TYPE_ANY; + return; +} + +static inline unsigned int autofs_type_any(unsigned int type) +{ + return (type == AUTOFS_TYPE_ANY); +} /* Daemon notification packet types */ enum autofs_notify { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6cbfbe297180..77b4a9e46004 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -18,6 +18,7 @@ struct pt_regs; #define BINPRM_BUF_SIZE 128 #ifdef __KERNEL__ +#include <linux/list.h> #define CORENAME_MAX_SIZE 128 @@ -106,7 +107,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); -extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); +extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); extern int set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1164963c3a85..08b78c09b09a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -329,13 +329,7 @@ struct cgroup_subsys { struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); - /* - * This routine is called with the task_lock of mm->owner held - */ - void (*mm_owner_changed)(struct cgroup_subsys *ss, - struct cgroup *old, - struct cgroup *new, - struct task_struct *p); + int subsys_id; int active; int disabled; @@ -400,9 +394,6 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -void cgroup_mm_owner_callbacks(struct task_struct *old, - struct task_struct *new); - #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -420,9 +411,6 @@ static inline int cgroupstats_build(struct cgroupstats *stats, return -EINVAL; } -static inline void cgroup_mm_owner_callbacks(struct task_struct *old, - struct task_struct *new) {} - #endif /* !CONFIG_CGROUPS */ #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8e540d32c9fe..51ea2bdea0f9 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -78,6 +78,8 @@ extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); +extern void cpuset_print_task_mems_allowed(struct task_struct *p); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -159,6 +161,10 @@ static inline void rebuild_sched_domains(void) partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_print_task_mems_allowed(struct task_struct *p) +{ +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index fb59673c60b1..d7eba77f666e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1133,7 +1133,6 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_syncing; int s_need_sync_fs; atomic_t s_active; #ifdef CONFIG_SECURITY diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 350fe9767bbc..162e5defe683 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -1,6 +1,6 @@ /* FUSE: Filesystem in Userspace - Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. @@ -20,29 +20,27 @@ * * 7.10 * - add nonseekable open flag + * + * 7.11 + * - add IOCTL message + * - add unsolicited notification support + * - add POLL message and NOTIFY_POLL notification */ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H -#include <asm/types.h> -#include <linux/major.h> +#include <linux/types.h> /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 10 +#define FUSE_KERNEL_MINOR_VERSION 11 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -/** The major number of the fuse character device */ -#define FUSE_MAJOR MISC_MAJOR - -/** The minor number of the fuse character device */ -#define FUSE_MINOR 229 - /* Make sure all structures are padded to 64bit boundary, so 32bit userspace works under 64bit kernels */ @@ -151,6 +149,28 @@ struct fuse_file_lock { */ #define FUSE_READ_LOCKOWNER (1 << 1) +/** + * Ioctl flags + * + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed + * FUSE_IOCTL_RETRY: retry with new iovecs + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +#define FUSE_IOCTL_COMPAT (1 << 0) +#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +#define FUSE_IOCTL_RETRY (1 << 2) + +#define FUSE_IOCTL_MAX_IOV 256 + +/** + * Poll flags + * + * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify + */ +#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -188,6 +208,13 @@ enum fuse_opcode { FUSE_INTERRUPT = 36, FUSE_BMAP = 37, FUSE_DESTROY = 38, + FUSE_IOCTL = 39, + FUSE_POLL = 40, +}; + +enum fuse_notify_code { + FUSE_NOTIFY_POLL = 1, + FUSE_NOTIFY_CODE_MAX, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -388,6 +415,38 @@ struct fuse_bmap_out { __u64 block; }; +struct fuse_ioctl_in { + __u64 fh; + __u32 flags; + __u32 cmd; + __u64 arg; + __u32 in_size; + __u32 out_size; +}; + +struct fuse_ioctl_out { + __s32 result; + __u32 flags; + __u32 in_iovs; + __u32 out_iovs; +}; + +struct fuse_poll_in { + __u64 fh; + __u64 kh; + __u32 flags; + __u32 padding; +}; + +struct fuse_poll_out { + __u32 revents; + __u32 padding; +}; + +struct fuse_notify_poll_wakeup_out { + __u64 kh; +}; + struct fuse_in_header { __u32 len; __u32 opcode; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e8003afeffba..dd20cd78faa8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -69,12 +69,6 @@ struct vm_area_struct; #define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ __GFP_HARDWALL | __GFP_HIGHMEM | \ __GFP_MOVABLE) -#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE) -#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ - __GFP_HARDWALL | __GFP_MOVABLE) -#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ - __GFP_HARDWALL | __GFP_HIGHMEM | \ - __GFP_MOVABLE) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e1c8afc002c0..f1d2fba19ea0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -233,6 +233,10 @@ static inline unsigned long huge_page_size(struct hstate *h) return (unsigned long)PAGE_SIZE << h->order; } +extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); + +extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); + static inline unsigned long huge_page_mask(struct hstate *h) { return h->mask; @@ -273,6 +277,8 @@ struct hstate {}; #define hstate_inode(i) NULL #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK +#define vma_kernel_pagesize(v) PAGE_SIZE +#define vma_mmu_pagesize(v) PAGE_SIZE #define huge_page_order(h) 0 #define huge_page_shift(h) PAGE_SHIFT static inline unsigned int pages_per_huge_page(struct hstate *h) diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index a8f84c01f82e..8137f660a5cc 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -234,6 +234,9 @@ struct twl4030_gpio_platform_data { /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ u8 mmc_cd; + /* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */ + u32 debounce; + /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup * should be enabled. Else, if that bit is set in "pulldowns", * that pulldown is enabled. Don't waste power by letting any @@ -307,12 +310,6 @@ int twl4030_sih_setup(int module); #define TWL4030_VAUX3_DEV_GRP 0x1F #define TWL4030_VAUX3_DEDICATED 0x22 -/* - * Exported TWL4030 GPIO APIs - * - * WARNING -- use standard GPIO and IRQ calls instead; these will vanish. - */ -int twl4030_set_gpio_debounce(int gpio, int enable); #if defined(CONFIG_TWL4030_BCI_BATTERY) || \ defined(CONFIG_TWL4030_BCI_BATTERY_MODULE) diff --git a/include/linux/ide.h b/include/linux/ide.h index db5ef8ae1ab9..3644f6323384 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -32,18 +32,14 @@ # define SUPPORT_VLB_SYNC 1 #endif -typedef unsigned char byte; /* used everywhere */ - /* * Probably not wise to fiddle with these */ +#define IDE_DEFAULT_MAX_FAILURES 1 #define ERROR_MAX 8 /* Max read/write errors per sector */ #define ERROR_RESET 3 /* Reset controller every 4th retry */ #define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ -#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) -#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup)) - /* * Definitions for accessing IDE controller registers */ @@ -185,9 +181,6 @@ typedef struct hw_regs_s { unsigned long config; } hw_regs_t; -void ide_init_port_data(struct hwif_s *, unsigned int); -void ide_init_port_hw(struct hwif_s *, hw_regs_t *); - static inline void ide_std_init_ports(hw_regs_t *hw, unsigned long io_addr, unsigned long ctl_addr) @@ -433,18 +426,14 @@ struct ide_atapi_pc { struct idetape_bh *bh; char *b_data; - /* idescsi only for now */ struct scatterlist *sg; unsigned int sg_cnt; - struct scsi_cmnd *scsi_cmd; - void (*done) (struct scsi_cmnd *); - unsigned long timeout; }; struct ide_devset; -struct ide_driver_s; +struct ide_driver; #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_drive_link; @@ -588,7 +577,6 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ struct request *rq; /* current request */ - struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS @@ -662,6 +650,8 @@ struct ide_drive_s { int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, unsigned int, int); + ide_startstop_t (*irq_handler)(struct ide_drive_s *); + unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; @@ -684,7 +674,6 @@ struct ide_tp_ops { void (*exec_command)(struct hwif_s *, u8); u8 (*read_status)(struct hwif_s *); u8 (*read_altstatus)(struct hwif_s *); - u8 (*read_sff_dma_status)(struct hwif_s *); void (*set_irq)(struct hwif_s *, int); @@ -745,14 +734,17 @@ struct ide_dma_ops { int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); void (*dma_timeout)(struct ide_drive_s *); + /* + * The following method is optional and only required to be + * implemented for the SFF-8038i compatible controllers. + */ + u8 (*dma_sff_read_status)(struct hwif_s *); }; struct ide_host; typedef struct hwif_s { - struct hwif_s *next; /* for linked-list in ide_hwgroup_t */ struct hwif_s *mate; /* other hwif from same PCI chip */ - struct hwgroup_s *hwgroup; /* actually (ide_hwgroup_t *) */ struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ struct ide_host *host; @@ -763,7 +755,7 @@ typedef struct hwif_s { unsigned long sata_scr[SATA_NR_PORTS]; - ide_drive_t drives[MAX_DRIVES]; /* drive info */ + ide_drive_t *devices[MAX_DRIVES + 1]; u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ @@ -829,7 +821,7 @@ typedef struct hwif_s { unsigned extra_ports; /* number of extra dma ports */ unsigned present : 1; /* this interface exists */ - unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ + unsigned busy : 1; /* serializes devices on a port */ struct device gendev; struct device *portdev; @@ -841,19 +833,49 @@ typedef struct hwif_s { #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_hwif_link *acpidata; #endif + + /* IRQ handler, if active */ + ide_startstop_t (*handler)(ide_drive_t *); + + /* BOOL: polling active & poll_timeout field valid */ + unsigned int polling : 1; + + /* current drive */ + ide_drive_t *cur_dev; + + /* current request */ + struct request *rq; + + /* failsafe timer */ + struct timer_list timer; + /* timeout value during long polls */ + unsigned long poll_timeout; + /* queried upon timeouts */ + int (*expiry)(ide_drive_t *); + + int req_gen; + int req_gen_timer; + + spinlock_t lock; } ____cacheline_internodealigned_in_smp ide_hwif_t; #define MAX_HOST_PORTS 4 struct ide_host { - ide_hwif_t *ports[MAX_HOST_PORTS]; + ide_hwif_t *ports[MAX_HOST_PORTS + 1]; unsigned int n_ports; struct device *dev[2]; unsigned int (*init_chipset)(struct pci_dev *); unsigned long host_flags; void *host_priv; + ide_hwif_t *cur_port; /* for hosts requiring serialization */ + + /* used for hosts requiring serialization */ + volatile long host_busy; }; +#define IDE_HOST_BUSY 0 + /* * internal ide interrupt handler type */ @@ -863,38 +885,6 @@ typedef int (ide_expiry_t)(ide_drive_t *); /* used by ide-cd, ide-floppy, etc. */ typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); -typedef struct hwgroup_s { - /* irq handler, if active */ - ide_startstop_t (*handler)(ide_drive_t *); - - /* BOOL: protects all fields below */ - volatile int busy; - /* BOOL: polling active & poll_timeout field valid */ - unsigned int polling : 1; - - /* current drive */ - ide_drive_t *drive; - /* ptr to current hwif in linked-list */ - ide_hwif_t *hwif; - - /* current request */ - struct request *rq; - - /* failsafe timer */ - struct timer_list timer; - /* timeout value during long polls */ - unsigned long poll_timeout; - /* queried upon timeouts */ - int (*expiry)(ide_drive_t *); - - int req_gen; - int req_gen_timer; - - spinlock_t lock; -} ide_hwgroup_t; - -typedef struct ide_driver_s ide_driver_t; - extern struct mutex ide_setting_mtx; /* @@ -1020,8 +1010,8 @@ void ide_proc_register_port(ide_hwif_t *); void ide_proc_port_register_devices(ide_hwif_t *); void ide_proc_unregister_device(ide_drive_t *); void ide_proc_unregister_port(ide_hwif_t *); -void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); -void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); +void ide_proc_register_driver(ide_drive_t *, struct ide_driver *); +void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *); read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -1048,8 +1038,10 @@ static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; } static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } -static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } -static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } +static inline void ide_proc_register_driver(ide_drive_t *drive, + struct ide_driver *driver) { ; } +static inline void ide_proc_unregister_driver(ide_drive_t *drive, + struct ide_driver *driver) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -1118,11 +1110,10 @@ void ide_check_pm_state(ide_drive_t *, struct request *); * The gendriver.owner field should be set to the module owner of this driver. * The gendriver.name field should be set to the name of this driver */ -struct ide_driver_s { +struct ide_driver { const char *version; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); - ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); @@ -1134,7 +1125,7 @@ struct ide_driver_s { #endif }; -#define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver) +#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver) int ide_device_get(ide_drive_t *); void ide_device_put(ide_drive_t *); @@ -1166,9 +1157,7 @@ void ide_execute_pkt_cmd(ide_drive_t *); void ide_pad_transfer(ide_drive_t *, int, int); -ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); - -ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); +ide_startstop_t ide_error(ide_drive_t *, const char *, u8); void ide_fix_driveid(u16 *); @@ -1192,7 +1181,6 @@ void ide_tf_dump(const char *, struct ide_taskfile *); void ide_exec_command(ide_hwif_t *, u8); u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); -u8 ide_read_sff_dma_status(ide_hwif_t *); void ide_set_irq(ide_hwif_t *, int); @@ -1272,26 +1260,6 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); - -static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup) -{ - if (hwgroup->busy) - return 1; - - hwgroup->busy = 1; - /* for atari only */ - ide_get_lock(ide_intr, hwgroup); - - return 0; -} - -static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup) -{ - /* for atari only */ - ide_release_lock(); - hwgroup->busy = 0; -} - extern void do_ide_request(struct request_queue *); void ide_init_disk(struct gendisk *, ide_drive_t *); @@ -1327,11 +1295,11 @@ static inline int ide_hwif_setup_dma(ide_hwif_t *hwif, } #endif -typedef struct ide_pci_enablebit_s { +struct ide_pci_enablebit { u8 reg; /* byte pci reg holding the enable-bit */ u8 mask; /* mask to isolate the enable-bit */ u8 val; /* value of masked reg when "enabled" */ -} ide_pci_enablebit_t; +}; enum { /* Uses ISA control ports not PCI ones. */ @@ -1420,7 +1388,8 @@ struct ide_port_info { const struct ide_port_ops *port_ops; const struct ide_dma_ops *dma_ops; - ide_pci_enablebit_t enablebits[2]; + struct ide_pci_enablebit enablebits[2]; + hwif_chipset_t chipset; u16 max_sectors; /* if < than the default one */ @@ -1492,6 +1461,7 @@ void ide_dma_exec_cmd(ide_drive_t *, u8); extern void ide_dma_start(ide_drive_t *); int ide_dma_end(ide_drive_t *); int ide_dma_test_irq(ide_drive_t *); +u8 ide_dma_sff_read_status(ide_hwif_t *); extern const struct ide_dma_ops sff_dma_ops; #else static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } @@ -1529,9 +1499,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_remove_port_from_hwgroup(ide_hwif_t *); -void ide_unregister(ide_hwif_t *); - void ide_register_region(struct gendisk *); void ide_unregister_region(struct gendisk *); @@ -1616,23 +1583,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive) ide_set_pio(drive, 255); } -extern spinlock_t ide_lock; -extern struct mutex ide_cfg_mtx; -/* - * Structure locking: - * - * ide_cfg_mtx and hwgroup->lock together protect changes to - * ide_hwif_t->next - * ide_drive_t->next - * - * ide_hwgroup_t->busy: hwgroup->lock - * ide_hwgroup_t->hwif: hwgroup->lock - * ide_hwif_t->{hwgroup,mate}: constant, no locking - * ide_drive_t->hwif: constant, no locking - */ - -#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) - char *ide_media_string(ide_drive_t *); extern struct device_attribute ide_dev_attrs[]; @@ -1651,8 +1601,15 @@ static inline int hwif_to_node(ide_hwif_t *hwif) static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) { - ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; + ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1]; return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL; } + +#define ide_port_for_each_dev(i, dev, port) \ + for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++) + +#define ide_host_for_each_port(i, port, host) \ + for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0702c4d7bdf0..af886b26c9d1 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,7 +14,6 @@ #include <linux/irqflags.h> #include <linux/smp.h> #include <linux/percpu.h> -#include <linux/irqnr.h> #include <asm/atomic.h> #include <asm/ptrace.h> diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d242fe1381fd..6b8e2027165e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -48,6 +48,12 @@ extern const char linux_proc_banner[]; #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define DIV_ROUND_CLOSEST(x, divisor)( \ +{ \ + typeof(divisor) __divisor = divisor; \ + (((x) + ((__divisor) / 2)) / (__divisor)); \ +} \ +) #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 497b1d1f7a05..d6ea19e314bb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -69,9 +69,6 @@ struct kprobe { /* list of kprobes for multi-handler support */ struct list_head list; - /* Indicates that the corresponding module has been ref counted */ - unsigned int mod_refcounted; - /*count the number of times this probe was temporarily disarmed */ unsigned long nmissed; @@ -103,8 +100,19 @@ struct kprobe { /* copy of the original instruction */ struct arch_specific_insn ainsn; + + /* Indicates various status flags. Protected by kprobe_mutex. */ + u32 flags; }; +/* Kprobe status flags */ +#define KPROBE_FLAG_GONE 1 /* breakpoint has already gone */ + +static inline int kprobe_gone(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_GONE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding @@ -201,7 +209,6 @@ static inline int init_test_probes(void) } #endif /* CONFIG_KPROBES_SANITY_TEST */ -extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); diff --git a/include/linux/memory.h b/include/linux/memory.h index 36c82c9e6ea7..3fdc10806d31 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -extern int register_new_memory(struct mem_section *); +extern int register_new_memory(int, struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); extern int memory_notify(unsigned long val, void *v); +extern struct memory_block *find_memory_block(struct mem_section *); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) - - +enum mem_add_context { BOOT, HOTPLUG }; #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 763ba81fc0f0..d95f72e79b82 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -72,7 +72,7 @@ extern void __offline_isolated_pages(unsigned long, unsigned long); extern int offline_pages(unsigned long, unsigned long, unsigned long); /* reasonably generic interface to expand the physical pages in a zone */ -extern int __add_pages(struct zone *zone, unsigned long start_pfn, +extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3f34005068d4..527602cdea1c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,6 +7,8 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION +#define PAGE_MIGRATION 1 + extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); @@ -20,6 +22,8 @@ extern int migrate_vmas(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, unsigned long flags); #else +#define PAGE_MIGRATION 0 + static inline int putback_lru_pages(struct list_head *l) { return 0; } static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private) { return -ENOSYS; } diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 26433ec520b3..a820f816a49e 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -3,33 +3,33 @@ #include <linux/module.h> #include <linux/major.h> -#define PSMOUSE_MINOR 1 -#define MS_BUSMOUSE_MINOR 2 -#define ATIXL_BUSMOUSE_MINOR 3 -/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ -#define ATARIMOUSE_MINOR 5 -#define SUN_MOUSE_MINOR 6 -#define APOLLO_MOUSE_MINOR 7 -#define PC110PAD_MINOR 9 -/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ +#define PSMOUSE_MINOR 1 +#define MS_BUSMOUSE_MINOR 2 +#define ATIXL_BUSMOUSE_MINOR 3 +/*#define AMIGAMOUSE_MINOR 4 FIXME OBSOLETE */ +#define ATARIMOUSE_MINOR 5 +#define SUN_MOUSE_MINOR 6 +#define APOLLO_MOUSE_MINOR 7 +#define PC110PAD_MINOR 9 +/*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ #define WATCHDOG_MINOR 130 /* Watchdog timer */ #define TEMP_MINOR 131 /* Temperature Sensor */ -#define RTC_MINOR 135 +#define RTC_MINOR 135 #define EFI_RTC_MINOR 136 /* EFI Time services */ -#define SUN_OPENPROM_MINOR 139 +#define SUN_OPENPROM_MINOR 139 #define DMAPI_MINOR 140 /* DMAPI */ -#define NVRAM_MINOR 144 -#define SGI_MMTIMER 153 +#define NVRAM_MINOR 144 +#define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 -#define I2O_MINOR 166 +#define I2O_MINOR 166 #define MICROCODE_MINOR 184 -#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ -#define MPT_MINOR 220 -#define MISC_DYNAMIC_MINOR 255 - -#define TUN_MINOR 200 -#define HPET_MINOR 228 -#define KVM_MINOR 232 +#define TUN_MINOR 200 +#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ +#define MPT_MINOR 220 +#define HPET_MINOR 228 +#define FUSE_MINOR 229 +#define KVM_MINOR 232 +#define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/include/linux/mm.h b/include/linux/mm.h index aaa8b843be28..4a3d28c86443 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +/* + * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. + */ +extern void pagefault_out_of_memory(void); + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) extern void show_free_areas(void); diff --git a/include/linux/module.h b/include/linux/module.h index 3bfed013350b..4f7ea12463d3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -294,9 +294,6 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; - /* The handle returned from unwind_add_table. */ - void *unwind_info; - /* Arch-specific module values */ struct mod_arch_specific arch; @@ -368,6 +365,18 @@ struct module *module_text_address(unsigned long addr); struct module *__module_text_address(unsigned long addr); int is_module_address(unsigned long addr); +static inline int within_module_core(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_core <= addr && + addr < (unsigned long)mod->module_core + mod->core_size; +} + +static inline int within_module_init(unsigned long addr, struct module *mod) +{ + return (unsigned long)mod->module_init <= addr && + addr < (unsigned long)mod->module_init + mod->init_size; +} + /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, diff --git a/include/linux/node.h b/include/linux/node.h index bc001bc225c3..681a697b9a86 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -26,6 +26,7 @@ struct node { struct sys_device sysdev; }; +struct memory_block; extern struct node node_devices[]; extern int register_node(struct node *, int, struct node *); @@ -35,6 +36,9 @@ extern int register_one_node(int nid); extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); +extern int register_mem_sect_under_node(struct memory_block *mem_blk, + int nid); +extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk); #else static inline int register_one_node(int nid) { @@ -52,6 +56,15 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { return 0; } +static inline int register_mem_sect_under_node(struct memory_block *mem_blk, + int nid) +{ + return 0; +} +static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +{ + return 0; +} #endif #define to_node(sys_device) container_of(sys_device, struct node, sysdev) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b12f93a3c345..219a523ecdb0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -228,6 +228,7 @@ PAGEFLAG_FALSE(HighMem) PAGEFLAG(SwapCache, swapcache) #else PAGEFLAG_FALSE(SwapCache) + SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) #endif #ifdef CONFIG_UNEVICTABLE_LRU @@ -372,31 +373,22 @@ static inline void __ClearPageTail(struct page *page) #define __PG_MLOCKED 0 #endif -#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ - 1 << PG_buddy | 1 << PG_writeback | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - __PG_UNEVICTABLE | __PG_MLOCKED) - -/* - * Flags checked in bad_page(). Pages on the free list should not have - * these flags set. It they are, there is a problem. - */ -#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \ - 1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked) - /* * Flags checked when a page is freed. Pages being freed should not have * these flags set. It they are, there is a problem. */ -#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved) +#define PAGE_FLAGS_CHECK_AT_FREE \ + (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ + 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + __PG_UNEVICTABLE | __PG_MLOCKED) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have these flags set. It they are, there - * is a problem. + * Pages being prepped should not have any flags set. It they are set, + * there has been a kernel bug or struct page corruption. */ -#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \ - 1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked) +#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index e90a2cb02915..7b2886fa7fdc 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -21,7 +21,6 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -void __pagevec_release_nonlru(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); @@ -69,12 +68,6 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } -static inline void pagevec_release_nonlru(struct pagevec *pvec) -{ - if (pagevec_count(pvec)) - __pagevec_release_nonlru(pvec); -} - static inline void pagevec_free(struct pagevec *pvec) { if (pagevec_count(pvec)) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 218c73b1e6d4..d543365518ab 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1658,6 +1658,7 @@ #define PCI_VENDOR_ID_ROCKWELL 0x127A #define PCI_VENDOR_ID_ITE 0x1283 +#define PCI_DEVICE_ID_ITE_8172 0x8172 #define PCI_DEVICE_ID_ITE_8211 0x8211 #define PCI_DEVICE_ID_ITE_8212 0x8212 #define PCI_DEVICE_ID_ITE_8213 0x8213 diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..99de7a31bab8 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -24,11 +24,7 @@ struct percpu_counter { s32 *counters; }; -#if NR_CPUS >= 16 -#define FBC_BATCH (NR_CPUS*2) -#else -#define FBC_BATCH (NR_CPUS*4) -#endif +extern int percpu_counter_batch; int percpu_counter_init(struct percpu_counter *fbc, s64 amount); int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); @@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - __percpu_counter_add(fbc, amount, FBC_BATCH); + __percpu_counter_add(fbc, amount, percpu_counter_batch); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) diff --git a/include/linux/poll.h b/include/linux/poll.h index badd98ab06f6..8c24ef8d9976 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -46,9 +46,9 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) } struct poll_table_entry { - struct file * filp; + struct file *filp; wait_queue_t wait; - wait_queue_head_t * wait_address; + wait_queue_head_t *wait_address; }; /* @@ -56,7 +56,9 @@ struct poll_table_entry { */ struct poll_wqueues { poll_table pt; - struct poll_table_page * table; + struct poll_table_page *table; + struct task_struct *polling_task; + int triggered; int error; int inline_index; struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; @@ -64,6 +66,13 @@ struct poll_wqueues { extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); +extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack); + +static inline int poll_schedule(struct poll_wqueues *pwq, int state) +{ + return poll_schedule_timeout(pwq, state, NULL, 0); +} /* * Scaleable version of the fd_set. diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 32c0547ffafc..c93a58a40033 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -391,7 +391,6 @@ static inline int rio_add_inb_buffer(struct rio_mport *mport, int mbox, * rio_get_inb_message - Get A RIO message from an inbound mailbox queue * @mport: Master port containing the inbound mailbox * @mbox: The inbound mailbox number - * @buffer: Pointer to the message buffer * * Get a RIO message from an inbound mailbox queue. Returns 0 on success. */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 89f0564b10c8..b35bc0e19cd9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -63,16 +63,13 @@ void anon_vma_unlink(struct vm_area_struct *); void anon_vma_link(struct vm_area_struct *); void __anon_vma_link(struct vm_area_struct *); -extern struct anon_vma *page_lock_anon_vma(struct page *page); -extern void page_unlock_anon_vma(struct anon_vma *anon_vma); - /* * rmap interfaces called when adding or removing pte of page */ void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_file_rmap(struct page *); -void page_remove_rmap(struct page *, struct vm_area_struct *); +void page_remove_rmap(struct page *); #ifdef CONFIG_DEBUG_VM void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address); diff --git a/include/linux/sched.h b/include/linux/sched.h index 38a3f4b15394..ea415136ac9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -386,6 +386,9 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) +#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) + extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h new file mode 100644 index 000000000000..0f01a0f1f40c --- /dev/null +++ b/include/linux/spi/spi_gpio.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_SPI_GPIO_H +#define __LINUX_SPI_GPIO_H + +/* + * For each bitbanged SPI bus, set up a platform_device node with: + * - name "spi_gpio" + * - id the same as the SPI bus number it implements + * - dev.platform data pointing to a struct spi_gpio_platform_data + * + * Or, see the driver code for information about speedups that are + * possible on platforms that support inlined access for GPIOs (no + * spi_gpio_platform_data is used). + * + * Use spi_board_info with these busses in the usual way, being sure + * that the controller_data being the GPIO used for each device's + * chipselect: + * + * static struct spi_board_info ... [] = { + * ... + * // this slave uses GPIO 42 for its chipselect + * .controller_data = (void *) 42, + * ... + * // this one uses GPIO 86 for its chipselect + * .controller_data = (void *) 86, + * ... + * }; + * + * If the bitbanged bus is later switched to a "native" controller, + * that platform_device and controller_data should be removed. + */ + +/** + * struct spi_gpio_platform_data - parameter for bitbanged SPI master + * @sck: number of the GPIO used for clock output + * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data + * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data + * @num_chipselect: how many slaves to allow + * + * All GPIO signals used with the SPI bus managed through this driver + * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead + * of some alternate function. + * + * It can be convenient to use this driver with pins that have alternate + * functions associated with a "native" SPI controller if a driver for that + * controller is not available, or is missing important functionality. + * + * On platforms which can do so, configure MISO with a weak pullup unless + * there's an external pullup on that signal. That saves power by avoiding + * floating signals. (A weak pulldown would save power too, but many + * drivers expect to see all-ones data as the no slave "response".) + */ +struct spi_gpio_platform_data { + unsigned sck; + unsigned mosi; + unsigned miso; + + u16 num_chipselect; +}; + +#endif /* __LINUX_SPI_GPIO_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index a3af95b2cb6d..91dee50fe260 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -120,7 +120,9 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), + SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ + SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ /* add others here before... */ SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; @@ -134,22 +136,24 @@ enum { * The in-memory structure used to track swap areas. */ struct swap_info_struct { - unsigned int flags; + unsigned long flags; int prio; /* swap priority */ + int next; /* next entry on swap list */ struct file *swap_file; struct block_device *bdev; struct list_head extent_list; struct swap_extent *curr_swap_extent; - unsigned old_block_size; - unsigned short * swap_map; + unsigned short *swap_map; unsigned int lowest_bit; unsigned int highest_bit; + unsigned int lowest_alloc; /* while preparing discard cluster */ + unsigned int highest_alloc; /* while preparing discard cluster */ unsigned int cluster_next; unsigned int cluster_nr; unsigned int pages; unsigned int max; unsigned int inuse_pages; - int next; /* next entry on swap list */ + unsigned int old_block_size; }; struct swap_list_t { @@ -163,7 +167,6 @@ struct swap_list_t { /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; -extern long nr_swap_pages; extern unsigned int nr_free_buffer_pages(void); extern unsigned int nr_free_pagecache_pages(void); @@ -174,8 +177,6 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); -extern void lru_cache_add_active_or_unevictable(struct page *, - struct vm_area_struct *); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); @@ -280,7 +281,7 @@ extern void end_swap_bio_read(struct bio *bio, int err); extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *, gfp_t); +extern int add_to_swap(struct page *); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); @@ -293,6 +294,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); /* linux/mm/swapfile.c */ +extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); @@ -300,15 +302,14 @@ extern swp_entry_t get_swap_page_of_type(int); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); -extern void free_swap_and_cache(swp_entry_t); +extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); -extern int can_share_swap_page(struct page *); -extern int remove_exclusive_swap_page(struct page *); -extern int remove_exclusive_swap_page_ref(struct page *); +extern int reuse_swap_page(struct page *); +extern int try_to_free_swap(struct page *); struct backing_dev_info; /* linux/mm/thrash.c */ @@ -334,7 +335,8 @@ static inline void disable_swap_token(void) #else /* CONFIG_SWAP */ -#define total_swap_pages 0 +#define nr_swap_pages 0L +#define total_swap_pages 0L #define total_swapcache_pages 0UL #define si_swapinfo(val) \ @@ -350,14 +352,8 @@ static inline void show_swap_cache_info(void) { } -static inline void free_swap_and_cache(swp_entry_t swp) -{ -} - -static inline int swap_duplicate(swp_entry_t swp) -{ - return 0; -} +#define free_swap_and_cache(swp) is_migration_entry(swp) +#define swap_duplicate(swp) is_migration_entry(swp) static inline void swap_free(swp_entry_t swp) { @@ -374,7 +370,10 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -#define can_share_swap_page(p) (page_mapcount(p) == 1) +static inline int add_to_swap(struct page *page) +{ + return 0; +} static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) @@ -390,14 +389,9 @@ static inline void delete_from_swap_cache(struct page *page) { } -#define swap_token_default_timeout 0 - -static inline int remove_exclusive_swap_page(struct page *p) -{ - return 0; -} +#define reuse_swap_page(page) (page_mapcount(page) == 1) -static inline int remove_exclusive_swap_page_ref(struct page *page) +static inline int try_to_free_swap(struct page *page) { return 0; } diff --git a/include/linux/types.h b/include/linux/types.h index 121f349cb7ec..3b864f2d9560 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -195,6 +195,16 @@ typedef u32 phys_addr_t; typedef phys_addr_t resource_size_t; +typedef struct { + volatile int counter; +} atomic_t; + +#ifdef CONFIG_64BIT +typedef struct { + volatile long counter; +} atomic64_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; diff --git a/include/linux/unwind.h b/include/linux/unwind.h deleted file mode 100644 index 7760860fa170..000000000000 --- a/include/linux/unwind.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _LINUX_UNWIND_H -#define _LINUX_UNWIND_H - -/* - * Copyright (C) 2002-2006 Novell, Inc. - * Jan Beulich <jbeulich@novell.com> - * This code is released under version 2 of the GNU GPL. - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whistles, so there - * is not much point in implementing the full Dwarf2 unwind API. - */ - -struct module; - -struct unwind_frame_info {}; - -static inline void unwind_init(void) {} -static inline void unwind_setup(void) {} - -#ifdef CONFIG_MODULES - -static inline void *unwind_add_table(struct module *mod, - const void *table_start, - unsigned long table_size) -{ - return NULL; -} - -static inline void unwind_remove_table(void *handle, int init_only) -{ -} - -#endif - -static inline int unwind_init_frame_info(struct unwind_frame_info *info, - struct task_struct *tsk, - const struct pt_regs *regs) -{ - return -ENOSYS; -} - -static inline int unwind_init_blocked(struct unwind_frame_info *info, - struct task_struct *tsk) -{ - return -ENOSYS; -} - -static inline int unwind_init_running(struct unwind_frame_info *info, - asmlinkage int (*cb)(struct unwind_frame_info *, - void *arg), - void *arg) -{ - return -ENOSYS; -} - -static inline int unwind(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -static inline int unwind_to_user(struct unwind_frame_info *info) -{ - return -ENOSYS; -} - -#endif /* _LINUX_UNWIND_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 307b88577eaa..506e7620a986 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -97,6 +97,10 @@ extern void unmap_kernel_range(unsigned long addr, unsigned long size); extern struct vm_struct *alloc_vm_area(size_t size); extern void free_vm_area(struct vm_struct *area); +/* for /dev/kmem */ +extern long vread(char *buf, char *addr, unsigned long count); +extern long vwrite(char *buf, char *addr, unsigned long count); + /* * Internals. Dont't use.. */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e585657e9831..7300ecdc480c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -30,7 +30,6 @@ static inline int task_is_pdflush(struct task_struct *task) enum writeback_sync_modes { WB_SYNC_NONE, /* Don't wait on anything */ WB_SYNC_ALL, /* Wait on every mapping */ - WB_SYNC_HOLD, /* Hold the inode on sb_dirty for sys_sync() */ }; /* @@ -107,7 +106,9 @@ void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ extern int dirty_background_ratio; +extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; +extern unsigned long vm_dirty_bytes; extern int dirty_writeback_interval; extern int dirty_expire_interval; extern int vm_highmem_is_dirtyable; @@ -116,17 +117,26 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); +extern int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); +extern int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); struct ctl_table; struct file; int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, - struct backing_dev_info *bdi); +void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, + unsigned long *pbdi_dirty, struct backing_dev_info *bdi); void page_writeback_init(void); void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, diff --git a/init/Kconfig b/init/Kconfig index d9d3dbabdb18..e7893b1d3e42 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -848,10 +848,6 @@ config RT_MUTEXES boolean select PLIST -config TINY_SHMEM - default !SHMEM - bool - config BASE_SMALL int default 0 if BASE_FULL diff --git a/init/do_mounts.c b/init/do_mounts.c index d055b1914c3d..5efca73b39f9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -220,10 +220,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) sys_chdir("/root"); ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev; - printk("VFS: Mounted root (%s filesystem)%s.\n", + printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n", current->fs->pwd.mnt->mnt_sb->s_type->name, current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ? - " readonly" : ""); + " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); return 0; } diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index d6da5cdd3c38..ff95e3192884 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -271,7 +271,7 @@ static int __init raid_setup(char *str) __setup("raid=", raid_setup); __setup("md=", md_setup); -static void autodetect_raid(void) +static void __init autodetect_raid(void) { int fd; diff --git a/init/main.c b/init/main.c index cd168ebc5924..b5a892c68375 100644 --- a/init/main.c +++ b/init/main.c @@ -50,7 +50,6 @@ #include <linux/rmap.h> #include <linux/mempolicy.h> #include <linux/key.h> -#include <linux/unwind.h> #include <linux/buffer_head.h> #include <linux/page_cgroup.h> #include <linux/debug_locks.h> @@ -108,7 +107,7 @@ EXPORT_SYMBOL(system_state); extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ -void (*late_time_init)(void); +void (*__initdata late_time_init)(void); extern void softirq_init(void); /* Untouched command line saved by arch-specific code. */ @@ -447,7 +446,7 @@ static void __init setup_command_line(char *command_line) * gcc-3.4 accidentally inlines this function, so use noinline. */ -static void noinline __init_refok rest_init(void) +static noinline void __init_refok rest_init(void) __releases(kernel_lock) { int pid; @@ -537,7 +536,6 @@ asmlinkage void __init start_kernel(void) * Need to run as early as possible, to initialize the * lockdep hash: */ - unwind_init(); lockdep_init(); debug_objects_early_init(); cgroup_init_early(); @@ -559,7 +557,6 @@ asmlinkage void __init start_kernel(void) setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); - unwind_setup(); setup_per_cpu_areas(); setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ @@ -786,7 +783,7 @@ static void run_init_process(char *init_filename) /* This is a non __init function. Force it to be noinline otherwise gcc * makes it inline to init() and it becomes part of init.text section */ -static int noinline init_post(void) +static noinline int init_post(void) { free_initmem(); unlock_kernel(); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 0dfebc509426..4a7a12c95abe 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -26,29 +26,6 @@ static void *get_ipc(ctl_table *table) return which; } -/* - * Routine that is called when the file "auto_msgmni" has successfully been - * written. - * Two values are allowed: - * 0: unregister msgmni's callback routine from the ipc namespace notifier - * chain. This means that msgmni won't be recomputed anymore upon memory - * add/remove or ipc namespace creation/removal. - * 1: register back the callback routine. - */ -static void ipc_auto_callback(int val) -{ - if (!val) - unregister_ipcns_notifier(current->nsproxy->ipc_ns); - else { - /* - * Re-enable automatic recomputing only if not already - * enabled. - */ - recompute_msgmni(current->nsproxy->ipc_ns); - cond_register_ipcns_notifier(current->nsproxy->ipc_ns); - } -} - #ifdef CONFIG_PROC_FS static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -94,6 +71,29 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, lenp, ppos); } +/* + * Routine that is called when the file "auto_msgmni" has successfully been + * written. + * Two values are allowed: + * 0: unregister msgmni's callback routine from the ipc namespace notifier + * chain. This means that msgmni won't be recomputed anymore upon memory + * add/remove or ipc namespace creation/removal. + * 1: register back the callback routine. + */ +static void ipc_auto_callback(int val) +{ + if (!val) + unregister_ipcns_notifier(current->nsproxy->ipc_ns); + else { + /* + * Re-enable automatic recomputing only if not already + * enabled. + */ + recompute_msgmni(current->nsproxy->ipc_ns); + cond_register_ipcns_notifier(current->nsproxy->ipc_ns); + } +} + static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/ipc/sem.c b/ipc/sem.c index fea0ad3aed7b..c68cd3f8f0c9 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1216,7 +1216,6 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, if (timeout && jiffies_left == 0) error = -EAGAIN; list_del(&queue.list); - goto out_unlock_free; out_unlock_free: sem_unlock(sma); diff --git a/ipc/shm.c b/ipc/shm.c index 57dd50046cef..b125b560240e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -75,7 +75,7 @@ void shm_init_ns(struct ipc_namespace *ns) ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; ns->shm_tot = 0; - ipc_init_ids(&ns->ids[IPC_SHM_IDS]); + ipc_init_ids(&shm_ids(ns)); } /* @@ -644,7 +644,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) if (err) return err; - memset(&shminfo,0,sizeof(shminfo)); + memset(&shminfo, 0, sizeof(shminfo)); shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; shminfo.shmmax = ns->shm_ctlmax; shminfo.shmall = ns->shm_ctlall; @@ -669,7 +669,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) if (err) return err; - memset(&shm_info,0,sizeof(shm_info)); + memset(&shm_info, 0, sizeof(shm_info)); down_read(&shm_ids(ns).rw_mutex); shm_info.used_ids = shm_ids(ns).in_use; shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); @@ -678,7 +678,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) shm_info.swap_successes = 0; err = ipc_get_maxid(&shm_ids(ns)); up_read(&shm_ids(ns).rw_mutex); - if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { + if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { err = -EFAULT; goto out; } @@ -692,11 +692,6 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) struct shmid64_ds tbuf; int result; - if (!buf) { - err = -EFAULT; - goto out; - } - if (cmd == SHM_STAT) { shp = shm_lock(ns, shmid); if (IS_ERR(shp)) { @@ -712,7 +707,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) } result = 0; } - err=-EACCES; + err = -EACCES; if (ipcperms (&shp->shm_perm, S_IRUGO)) goto out_unlock; err = security_shm_shmctl(shp, cmd); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 87bb0258fd27..f221446aa02d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -116,7 +116,6 @@ static int root_count; * be called. */ static int need_forkexit_callback __read_mostly; -static int need_mm_owner_callback __read_mostly; /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) @@ -2539,7 +2538,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; need_forkexit_callback |= ss->fork || ss->exit; - need_mm_owner_callback |= !!ss->mm_owner_changed; /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't @@ -2789,37 +2787,6 @@ void cgroup_fork_callbacks(struct task_struct *child) } } -#ifdef CONFIG_MM_OWNER -/** - * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes - * @p: the new owner - * - * Called on every change to mm->owner. mm_init_owner() does not - * invoke this routine, since it assigns the mm->owner the first time - * and does not change it. - * - * The callbacks are invoked with mmap_sem held in read mode. - */ -void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) -{ - struct cgroup *oldcgrp, *newcgrp = NULL; - - if (need_mm_owner_callback) { - int i; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup_subsys *ss = subsys[i]; - oldcgrp = task_cgroup(old, ss->subsys_id); - if (new) - newcgrp = task_cgroup(new, ss->subsys_id); - if (oldcgrp == newcgrp) - continue; - if (ss->mm_owner_changed) - ss->mm_owner_changed(ss, oldcgrp, newcgrp, new); - } - } -} -#endif /* CONFIG_MM_OWNER */ - /** * cgroup_post_fork - called on a new task after adding it to the task list * @child: the task in question diff --git a/kernel/compat.c b/kernel/compat.c index d52e2ec1deb5..42d56544460f 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -24,6 +24,7 @@ #include <linux/migrate.h> #include <linux/posix-timers.h> #include <linux/times.h> +#include <linux/ptrace.h> #include <asm/uaccess.h> @@ -229,6 +230,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) if (copy_to_user(tbuf, &tmp, sizeof(tmp))) return -EFAULT; } + force_successful_syscall_return(); return compat_jiffies_to_clock_t(jiffies); } @@ -894,8 +896,9 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc) if (tloc) { if (put_user(i,tloc)) - i = -EFAULT; + return -EFAULT; } + force_successful_syscall_return(); return i; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 39c1a4c1c5a9..345ace5117de 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -240,6 +240,17 @@ static struct cpuset top_cpuset = { static DEFINE_MUTEX(callback_mutex); /* + * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist + * buffers. They are statically allocated to prevent using excess stack + * when calling cpuset_print_task_mems_allowed(). + */ +#define CPUSET_NAME_LEN (128) +#define CPUSET_NODELIST_LEN (256) +static char cpuset_name[CPUSET_NAME_LEN]; +static char cpuset_nodelist[CPUSET_NODELIST_LEN]; +static DEFINE_SPINLOCK(cpuset_buffer_lock); + +/* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead @@ -2356,6 +2367,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); } +/** + * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed + * @task: pointer to task_struct of some task. + * + * Description: Prints @task's name, cpuset name, and cached copy of its + * mems_allowed to the kernel log. Must hold task_lock(task) to allow + * dereferencing task_cs(task). + */ +void cpuset_print_task_mems_allowed(struct task_struct *tsk) +{ + struct dentry *dentry; + + dentry = task_cs(tsk)->css.cgroup->dentry; + spin_lock(&cpuset_buffer_lock); + snprintf(cpuset_name, CPUSET_NAME_LEN, + dentry ? (const char *)dentry->d_name.name : "/"); + nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, + tsk->mems_allowed); + printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", + tsk->comm, cpuset_name, cpuset_nodelist); + spin_unlock(&cpuset_buffer_lock); +} + /* * Collection of memory_pressure is suppressed unless * this flag is enabled by writing "1" to the special diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c index f013a0c2e111..038707404b76 100644 --- a/kernel/dma-coherent.c +++ b/kernel/dma-coherent.c @@ -109,20 +109,40 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); int dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret) { - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + struct dma_coherent_mem *mem; int order = get_order(size); + int pageno; - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; + if (!dev) + return 0; + mem = dev->dma_mem; + if (!mem) + return 0; + if (unlikely(size > mem->size)) + return 0; + + pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); + if (pageno >= 0) { + /* + * Memory was found in the per-device arena. + */ + *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); + *ret = mem->virt_base + (pageno << PAGE_SHIFT); + memset(*ret, 0, size); + } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) { + /* + * The per-device arena is exhausted and we are not + * permitted to fall back to generic memory. + */ + *ret = NULL; + } else { + /* + * The per-device arena is exhausted and we are + * permitted to fall back to generic memory. + */ + return 0; } - return (mem != NULL); + return 1; } EXPORT_SYMBOL(dma_alloc_from_coherent); diff --git a/kernel/exit.c b/kernel/exit.c index c9e5a1c14e08..c7740fa3252c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -642,35 +642,31 @@ retry: /* * We found no owner yet mm_users > 1: this implies that we are * most likely racing with swapoff (try_to_unuse()) or /proc or - * ptrace or page migration (get_task_mm()). Mark owner as NULL, - * so that subsystems can understand the callback and take action. + * ptrace or page migration (get_task_mm()). Mark owner as NULL. */ - down_write(&mm->mmap_sem); - cgroup_mm_owner_callbacks(mm->owner, NULL); mm->owner = NULL; - up_write(&mm->mmap_sem); return; assign_new_owner: BUG_ON(c == p); get_task_struct(c); - read_unlock(&tasklist_lock); - down_write(&mm->mmap_sem); /* * The task_lock protects c->mm from changing. * We always want mm->owner->mm == mm */ task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); if (c->mm != mm) { task_unlock(c); - up_write(&mm->mmap_sem); put_task_struct(c); goto retry; } - cgroup_mm_owner_callbacks(mm->owner, c); mm->owner = c; task_unlock(c); - up_write(&mm->mmap_sem); put_task_struct(c); } #endif /* CONFIG_MM_OWNER */ @@ -1055,10 +1051,7 @@ NORET_TYPE void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - if (tsk->mm) { - update_hiwater_rss(tsk->mm); - update_hiwater_vm(tsk->mm); - } + group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); diff --git a/kernel/fork.c b/kernel/fork.c index 43cbf30669e6..7b8f2a78be3d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -400,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; + +static int __init coredump_filter_setup(char *s) +{ + default_dump_filter = + (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & + MMF_DUMP_FILTER_MASK; + return 1; +} + +__setup("coredump_filter=", coredump_filter_setup); + #include <linux/init_task.h> static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) @@ -408,8 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? current->mm->flags - : MMF_DUMP_FILTER_DEFAULT; + mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); @@ -758,7 +769,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) { struct sighand_struct *sig; - if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { + if (clone_flags & CLONE_SIGHAND) { atomic_inc(¤t->sighand->count); return 0; } diff --git a/kernel/kmod.c b/kernel/kmod.c index b46dbb908669..a27a5f64443d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -51,8 +51,8 @@ char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; /** * request_module - try to load a kernel module - * @fmt: printf style format string for the name of the module - * @varargs: arguements as specified in the format string + * @fmt: printf style format string for the name of the module + * @...: arguments as specified in the format string * * Load a module using the user mode module loader. The function returns * zero on success or a negative errno code on failure. Note that a diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9f8a3f25259a..1b9cbdc0127a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; /* NOTE: change this value only with kprobe_mutex held */ static bool kprobe_enabled; -DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ +static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; static struct { spinlock_t lock ____cacheline_aligned_in_smp; @@ -115,6 +115,7 @@ enum kprobe_slot_state { SLOT_USED = 2, }; +static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ static struct hlist_head kprobe_insn_pages; static int kprobe_garbage_slots; static int collect_garbage_slots(void); @@ -144,10 +145,10 @@ loop_end: } /** - * get_insn_slot() - Find a slot on an executable page for an instruction. + * __get_insn_slot() - Find a slot on an executable page for an instruction. * We allocate an executable page if there's no room on existing ones. */ -kprobe_opcode_t __kprobes *get_insn_slot(void) +static kprobe_opcode_t __kprobes *__get_insn_slot(void) { struct kprobe_insn_page *kip; struct hlist_node *pos; @@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) return kip->insns; } +kprobe_opcode_t __kprobes *get_insn_slot(void) +{ + kprobe_opcode_t *ret; + mutex_lock(&kprobe_insn_mutex); + ret = __get_insn_slot(); + mutex_unlock(&kprobe_insn_mutex); + return ret; +} + /* Return 1 if all garbages are collected, otherwise 0. */ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) { @@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void) { struct kprobe_insn_page *kip; struct hlist_node *pos, *next; + int safety; /* Ensure no-one is preepmted on the garbages */ - if (check_safety() != 0) + mutex_unlock(&kprobe_insn_mutex); + safety = check_safety(); + mutex_lock(&kprobe_insn_mutex); + if (safety != 0) return -EAGAIN; hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { @@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) struct kprobe_insn_page *kip; struct hlist_node *pos; + mutex_lock(&kprobe_insn_mutex); hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { if (kip->insns <= slot && slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { @@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) collect_garbage_slots(); + + mutex_unlock(&kprobe_insn_mutex); } #endif @@ -310,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe *kp; list_for_each_entry_rcu(kp, &p->list, list) { - if (kp->pre_handler) { + if (kp->pre_handler && !kprobe_gone(kp)) { set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) return 1; @@ -326,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, struct kprobe *kp; list_for_each_entry_rcu(kp, &p->list, list) { - if (kp->post_handler) { + if (kp->post_handler && !kprobe_gone(kp)) { set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); reset_kprobe_instance(); @@ -393,7 +410,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, hlist_add_head(&ri->hlist, head); } -void kretprobe_hash_lock(struct task_struct *tsk, +void __kprobes kretprobe_hash_lock(struct task_struct *tsk, struct hlist_head **head, unsigned long *flags) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); @@ -404,13 +421,15 @@ void kretprobe_hash_lock(struct task_struct *tsk, spin_lock_irqsave(hlist_lock, *flags); } -static void kretprobe_table_lock(unsigned long hash, unsigned long *flags) +static void __kprobes kretprobe_table_lock(unsigned long hash, + unsigned long *flags) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_lock_irqsave(hlist_lock, *flags); } -void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) +void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, + unsigned long *flags) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); spinlock_t *hlist_lock; @@ -419,7 +438,7 @@ void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) spin_unlock_irqrestore(hlist_lock, *flags); } -void kretprobe_table_unlock(unsigned long hash, unsigned long *flags) +void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_unlock_irqrestore(hlist_lock, *flags); @@ -526,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) ap->addr = p->addr; ap->pre_handler = aggr_pre_handler; ap->fault_handler = aggr_fault_handler; - if (p->post_handler) + /* We don't care the kprobe which has gone. */ + if (p->post_handler && !kprobe_gone(p)) ap->post_handler = aggr_post_handler; - if (p->break_handler) + if (p->break_handler && !kprobe_gone(p)) ap->break_handler = aggr_break_handler; INIT_LIST_HEAD(&ap->list); @@ -547,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, int ret = 0; struct kprobe *ap; + if (kprobe_gone(old_p)) { + /* + * Attempting to insert new probe at the same location that + * had a probe in the module vaddr area which already + * freed. So, the instruction slot has already been + * released. We need a new slot for the new probe. + */ + ret = arch_prepare_kprobe(old_p); + if (ret) + return ret; + } if (old_p->pre_handler == aggr_pre_handler) { copy_kprobe(old_p, p); ret = add_new_kprobe(old_p, p); + ap = old_p; } else { ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); - if (!ap) + if (!ap) { + if (kprobe_gone(old_p)) + arch_remove_kprobe(old_p); return -ENOMEM; + } add_aggr_kprobe(ap, old_p); copy_kprobe(ap, p); ret = add_new_kprobe(ap, p); } + if (kprobe_gone(old_p)) { + /* + * If the old_p has gone, its breakpoint has been disarmed. + * We have to arm it again after preparing real kprobes. + */ + ap->flags &= ~KPROBE_FLAG_GONE; + if (kprobe_enabled) + arch_arm_kprobe(ap); + } return ret; } @@ -600,8 +644,7 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) return (kprobe_opcode_t *)(((char *)addr) + p->offset); } -static int __kprobes __register_kprobe(struct kprobe *p, - unsigned long called_from) +int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *old_p; @@ -620,28 +663,30 @@ static int __kprobes __register_kprobe(struct kprobe *p, return -EINVAL; } - p->mod_refcounted = 0; - + p->flags = 0; /* * Check if are we probing a module. */ probed_mod = __module_text_address((unsigned long) p->addr); if (probed_mod) { - struct module *calling_mod; - calling_mod = __module_text_address(called_from); /* - * We must allow modules to probe themself and in this case - * avoid incrementing the module refcount, so as to allow - * unloading of self probing modules. + * We must hold a refcount of the probed module while updating + * its code to prohibit unexpected unloading. */ - if (calling_mod && calling_mod != probed_mod) { - if (unlikely(!try_module_get(probed_mod))) { - preempt_enable(); - return -EINVAL; - } - p->mod_refcounted = 1; - } else - probed_mod = NULL; + if (unlikely(!try_module_get(probed_mod))) { + preempt_enable(); + return -EINVAL; + } + /* + * If the module freed .init.text, we couldn't insert + * kprobes in there. + */ + if (within_module_init((unsigned long)p->addr, probed_mod) && + probed_mod->state != MODULE_STATE_COMING) { + module_put(probed_mod); + preempt_enable(); + return -EINVAL; + } } preempt_enable(); @@ -668,8 +713,9 @@ static int __kprobes __register_kprobe(struct kprobe *p, out: mutex_unlock(&kprobe_mutex); - if (ret && probed_mod) + if (probed_mod) module_put(probed_mod); + return ret; } @@ -697,16 +743,16 @@ valid_p: list_is_singular(&old_p->list))) { /* * Only probe on the hash list. Disarm only if kprobes are - * enabled - otherwise, the breakpoint would already have - * been removed. We save on flushing icache. + * enabled and not gone - otherwise, the breakpoint would + * already have been removed. We save on flushing icache. */ - if (kprobe_enabled) + if (kprobe_enabled && !kprobe_gone(old_p)) arch_disarm_kprobe(p); hlist_del_rcu(&old_p->hlist); } else { - if (p->break_handler) + if (p->break_handler && !kprobe_gone(p)) old_p->break_handler = NULL; - if (p->post_handler) { + if (p->post_handler && !kprobe_gone(p)) { list_for_each_entry_rcu(list_p, &old_p->list, list) { if ((list_p != p) && (list_p->post_handler)) goto noclean; @@ -721,39 +767,27 @@ noclean: static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) { - struct module *mod; struct kprobe *old_p; - if (p->mod_refcounted) { - /* - * Since we've already incremented refcount, - * we don't need to disable preemption. - */ - mod = module_text_address((unsigned long)p->addr); - if (mod) - module_put(mod); - } - - if (list_empty(&p->list) || list_is_singular(&p->list)) { - if (!list_empty(&p->list)) { - /* "p" is the last child of an aggr_kprobe */ - old_p = list_entry(p->list.next, struct kprobe, list); - list_del(&p->list); - kfree(old_p); - } + if (list_empty(&p->list)) arch_remove_kprobe(p); + else if (list_is_singular(&p->list)) { + /* "p" is the last child of an aggr_kprobe */ + old_p = list_entry(p->list.next, struct kprobe, list); + list_del(&p->list); + arch_remove_kprobe(old_p); + kfree(old_p); } } -static int __register_kprobes(struct kprobe **kps, int num, - unsigned long called_from) +int __kprobes register_kprobes(struct kprobe **kps, int num) { int i, ret = 0; if (num <= 0) return -EINVAL; for (i = 0; i < num; i++) { - ret = __register_kprobe(kps[i], called_from); + ret = register_kprobe(kps[i]); if (ret < 0) { if (i > 0) unregister_kprobes(kps, i); @@ -763,26 +797,11 @@ static int __register_kprobes(struct kprobe **kps, int num, return ret; } -/* - * Registration and unregistration functions for kprobe. - */ -int __kprobes register_kprobe(struct kprobe *p) -{ - return __register_kprobes(&p, 1, - (unsigned long)__builtin_return_address(0)); -} - void __kprobes unregister_kprobe(struct kprobe *p) { unregister_kprobes(&p, 1); } -int __kprobes register_kprobes(struct kprobe **kps, int num) -{ - return __register_kprobes(kps, num, - (unsigned long)__builtin_return_address(0)); -} - void __kprobes unregister_kprobes(struct kprobe **kps, int num) { int i; @@ -811,8 +830,7 @@ unsigned long __weak arch_deref_entry_point(void *entry) return (unsigned long)entry; } -static int __register_jprobes(struct jprobe **jps, int num, - unsigned long called_from) +int __kprobes register_jprobes(struct jprobe **jps, int num) { struct jprobe *jp; int ret = 0, i; @@ -830,7 +848,7 @@ static int __register_jprobes(struct jprobe **jps, int num, /* Todo: Verify probepoint is a function entry point */ jp->kp.pre_handler = setjmp_pre_handler; jp->kp.break_handler = longjmp_break_handler; - ret = __register_kprobe(&jp->kp, called_from); + ret = register_kprobe(&jp->kp); } if (ret < 0) { if (i > 0) @@ -843,8 +861,7 @@ static int __register_jprobes(struct jprobe **jps, int num, int __kprobes register_jprobe(struct jprobe *jp) { - return __register_jprobes(&jp, 1, - (unsigned long)__builtin_return_address(0)); + return register_jprobes(&jp, 1); } void __kprobes unregister_jprobe(struct jprobe *jp) @@ -852,12 +869,6 @@ void __kprobes unregister_jprobe(struct jprobe *jp) unregister_jprobes(&jp, 1); } -int __kprobes register_jprobes(struct jprobe **jps, int num) -{ - return __register_jprobes(jps, num, - (unsigned long)__builtin_return_address(0)); -} - void __kprobes unregister_jprobes(struct jprobe **jps, int num) { int i; @@ -920,8 +931,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, return 0; } -static int __kprobes __register_kretprobe(struct kretprobe *rp, - unsigned long called_from) +int __kprobes register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; @@ -967,21 +977,20 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp, rp->nmissed = 0; /* Establish function entry probe point */ - ret = __register_kprobe(&rp->kp, called_from); + ret = register_kprobe(&rp->kp); if (ret != 0) free_rp_inst(rp); return ret; } -static int __register_kretprobes(struct kretprobe **rps, int num, - unsigned long called_from) +int __kprobes register_kretprobes(struct kretprobe **rps, int num) { int ret = 0, i; if (num <= 0) return -EINVAL; for (i = 0; i < num; i++) { - ret = __register_kretprobe(rps[i], called_from); + ret = register_kretprobe(rps[i]); if (ret < 0) { if (i > 0) unregister_kretprobes(rps, i); @@ -991,23 +1000,11 @@ static int __register_kretprobes(struct kretprobe **rps, int num, return ret; } -int __kprobes register_kretprobe(struct kretprobe *rp) -{ - return __register_kretprobes(&rp, 1, - (unsigned long)__builtin_return_address(0)); -} - void __kprobes unregister_kretprobe(struct kretprobe *rp) { unregister_kretprobes(&rp, 1); } -int __kprobes register_kretprobes(struct kretprobe **rps, int num) -{ - return __register_kretprobes(rps, num, - (unsigned long)__builtin_return_address(0)); -} - void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) { int i; @@ -1055,6 +1052,72 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, #endif /* CONFIG_KRETPROBES */ +/* Set the kprobe gone and remove its instruction buffer. */ +static void __kprobes kill_kprobe(struct kprobe *p) +{ + struct kprobe *kp; + p->flags |= KPROBE_FLAG_GONE; + if (p->pre_handler == aggr_pre_handler) { + /* + * If this is an aggr_kprobe, we have to list all the + * chained probes and mark them GONE. + */ + list_for_each_entry_rcu(kp, &p->list, list) + kp->flags |= KPROBE_FLAG_GONE; + p->post_handler = NULL; + p->break_handler = NULL; + } + /* + * Here, we can remove insn_slot safely, because no thread calls + * the original probed function (which will be freed soon) any more. + */ + arch_remove_kprobe(p); +} + +/* Module notifier call back, checking kprobes on the module */ +static int __kprobes kprobes_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + struct hlist_head *head; + struct hlist_node *node; + struct kprobe *p; + unsigned int i; + int checkcore = (val == MODULE_STATE_GOING); + + if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) + return NOTIFY_DONE; + + /* + * When MODULE_STATE_GOING was notified, both of module .text and + * .init.text sections would be freed. When MODULE_STATE_LIVE was + * notified, only .init.text section would be freed. We need to + * disable kprobes which have been inserted in the sections. + */ + mutex_lock(&kprobe_mutex); + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { + head = &kprobe_table[i]; + hlist_for_each_entry_rcu(p, node, head, hlist) + if (within_module_init((unsigned long)p->addr, mod) || + (checkcore && + within_module_core((unsigned long)p->addr, mod))) { + /* + * The vaddr this probe is installed will soon + * be vfreed buy not synced to disk. Hence, + * disarming the breakpoint isn't needed. + */ + kill_kprobe(p); + } + } + mutex_unlock(&kprobe_mutex); + return NOTIFY_DONE; +} + +static struct notifier_block kprobe_module_nb = { + .notifier_call = kprobes_module_callback, + .priority = 0 +}; + static int __init init_kprobes(void) { int i, err = 0; @@ -1111,6 +1174,9 @@ static int __init init_kprobes(void) err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); + if (!err) + err = register_module_notifier(&kprobe_module_nb); + kprobes_initialized = (err == 0); if (!err) @@ -1131,10 +1197,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, else kprobe_type = "k"; if (sym) - seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, - sym, offset, (modname ? modname : " ")); + seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, + sym, offset, (modname ? modname : " "), + (kprobe_gone(p) ? "[GONE]" : "")); else - seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); + seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, + (kprobe_gone(p) ? "[GONE]" : "")); } static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) @@ -1215,7 +1283,8 @@ static void __kprobes enable_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) - arch_arm_kprobe(p); + if (!kprobe_gone(p)) + arch_arm_kprobe(p); } kprobe_enabled = true; @@ -1244,7 +1313,7 @@ static void __kprobes disable_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { - if (!arch_trampoline_kprobe(p)) + if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) arch_disarm_kprobe(p); } } diff --git a/kernel/module.c b/kernel/module.c index f47cce910f25..496dcb57b608 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -43,7 +43,6 @@ #include <linux/device.h> #include <linux/string.h> #include <linux/mutex.h> -#include <linux/unwind.h> #include <linux/rculist.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -1449,8 +1448,6 @@ static void free_module(struct module *mod) remove_sect_attrs(mod); mod_kobject_remove(mod); - unwind_remove_table(mod->unwind_info, 0); - /* Arch-specific cleanup. */ module_arch_cleanup(mod); @@ -1867,7 +1864,6 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int unwindex = 0; unsigned int num_kp, num_mcount; struct kernel_param *kp; struct module *mod; @@ -1957,9 +1953,6 @@ static noinline struct module *load_module(void __user *umod, versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); -#ifdef ARCH_UNWIND_SECTION_NAME - unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); -#endif /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -1969,8 +1962,6 @@ static noinline struct module *load_module(void __user *umod, sechdrs[symindex].sh_flags |= SHF_ALLOC; sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif - if (unwindex) - sechdrs[unwindex].sh_flags |= SHF_ALLOC; /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2267,11 +2258,6 @@ static noinline struct module *load_module(void __user *umod, add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); - /* Size of section 0 is 0, so this works well if no unwind info. */ - mod->unwind_info = unwind_add_table(mod, - (void *)sechdrs[unwindex].sh_addr, - sechdrs[unwindex].sh_size); - /* Get rid of temporary copy */ vfree(hdr); @@ -2366,11 +2352,12 @@ sys_init_module(void __user *umod, /* Now it's a first class citizen! Wake up anyone waiting for it. */ mod->state = MODULE_STATE_LIVE; wake_up(&module_wq); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_LIVE, mod); mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); - unwind_remove_table(mod->unwind_info, 1); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; @@ -2405,7 +2392,7 @@ static const char *get_ksymbol(struct module *mod, unsigned long nextval; /* At worse, next value is at end of module */ - if (within(addr, mod->module_init, mod->init_size)) + if (within_module_init(addr, mod)) nextval = (unsigned long)mod->module_init+mod->init_text_size; else nextval = (unsigned long)mod->module_core+mod->core_text_size; @@ -2453,8 +2440,8 @@ const char *module_address_lookup(unsigned long addr, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) - || within(addr, mod->module_core, mod->core_size)) { + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { if (modname) *modname = mod->name; ret = get_ksymbol(mod, addr, size, offset); @@ -2476,8 +2463,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) || - within(addr, mod->module_core, mod->core_size)) { + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, NULL, NULL); @@ -2500,8 +2487,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_init, mod->init_size) || - within(addr, mod->module_core, mod->core_size)) { + if (within_module_init(addr, mod) || + within_module_core(addr, mod)) { const char *sym; sym = get_ksymbol(mod, addr, size, offset); @@ -2720,7 +2707,7 @@ int is_module_address(unsigned long addr) preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { - if (within(addr, mod->module_core, mod->core_size)) { + if (within_module_core(addr, mod)) { preempt_enable(); return 1; } diff --git a/kernel/panic.c b/kernel/panic.c index 13f06349a786..2a2ff36ff44d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -299,6 +299,8 @@ static int init_oops_id(void) { if (!oops_id) get_random_bytes(&oops_id, sizeof(oops_id)); + else + oops_id++; return 0; } diff --git a/kernel/profile.c b/kernel/profile.c index d18e2d2654f2..784933acf5b8 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -445,7 +445,6 @@ void profile_tick(int type) #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> #include <asm/uaccess.h> -#include <asm/ptrace.h> static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) diff --git a/kernel/signal.c b/kernel/signal.c index 8e95855ff3cf..3152ac3b62e2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -858,7 +858,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, q->info.si_signo = sig; q->info.si_errno = 0; q->info.si_code = SI_USER; - q->info.si_pid = task_pid_vnr(current); + q->info.si_pid = task_tgid_nr_ns(current, + task_active_pid_ns(t)); q->info.si_uid = current_uid(); break; case (unsigned long) SEND_SIG_PRIV: diff --git a/kernel/sys.c b/kernel/sys.c index d356d79e84ac..4a43617cd565 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -33,6 +33,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/seccomp.h> #include <linux/cpu.h> +#include <linux/ptrace.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -927,6 +928,7 @@ asmlinkage long sys_times(struct tms __user * tbuf) if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) return -EFAULT; } + force_successful_syscall_return(); return (long) jiffies_64_to_clock_t(get_jiffies_64()); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff6d45c7626f..92f6e5bc3c24 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -87,10 +87,6 @@ extern int rcutorture_runnable; #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ /* Constants used for minimum and maximum */ -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) -static int one = 1; -#endif - #ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; static int neg_one = -1; @@ -101,6 +97,7 @@ static int two = 2; #endif static int zero; +static int one = 1; static int one_hundred = 100; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -952,12 +949,22 @@ static struct ctl_table vm_table[] = { .data = &dirty_background_ratio, .maxlen = sizeof(dirty_background_ratio), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &dirty_background_ratio_handler, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_background_bytes", + .data = &dirty_background_bytes, + .maxlen = sizeof(dirty_background_bytes), + .mode = 0644, + .proc_handler = &dirty_background_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, + { .ctl_name = VM_DIRTY_RATIO, .procname = "dirty_ratio", .data = &vm_dirty_ratio, @@ -969,6 +976,16 @@ static struct ctl_table vm_table[] = { .extra2 = &one_hundred, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_bytes", + .data = &vm_dirty_bytes, + .maxlen = sizeof(vm_dirty_bytes), + .mode = 0644, + .proc_handler = &dirty_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, + { .procname = "dirty_writeback_centisecs", .data = &dirty_writeback_interval, .maxlen = sizeof(dirty_writeback_interval), diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 06b6395b45b2..4f104515a19b 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c @@ -22,21 +22,11 @@ static u32 rand1, preh_val, posth_val, jph_val; static int errors, handler_errors, num_tests; +static u32 (*target)(u32 value); +static u32 (*target2)(u32 value); static noinline u32 kprobe_target(u32 value) { - /* - * gcc ignores noinline on some architectures unless we stuff - * sufficient lard into the function. The get_kprobe() here is - * just for that. - * - * NOTE: We aren't concerned about the correctness of get_kprobe() - * here; hence, this call is neither under !preempt nor with the - * kprobe_mutex held. This is fine(tm) - */ - if (get_kprobe((void *)0xdeadbeef)) - printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n"); - return (value / div_factor); } @@ -74,7 +64,7 @@ static int test_kprobe(void) return ret; } - ret = kprobe_target(rand1); + ret = target(rand1); unregister_kprobe(&kp); if (preh_val == 0) { @@ -92,6 +82,84 @@ static int test_kprobe(void) return 0; } +static noinline u32 kprobe_target2(u32 value) +{ + return (value / div_factor) + 1; +} + +static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs) +{ + preh_val = (rand1 / div_factor) + 1; + return 0; +} + +static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + if (preh_val != (rand1 / div_factor) + 1) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in post_handler2\n"); + } + posth_val = preh_val + div_factor; +} + +static struct kprobe kp2 = { + .symbol_name = "kprobe_target2", + .pre_handler = kp_pre_handler2, + .post_handler = kp_post_handler2 +}; + +static int test_kprobes(void) +{ + int ret; + struct kprobe *kps[2] = {&kp, &kp2}; + + kp.addr = 0; /* addr should be cleard for reusing kprobe. */ + ret = register_kprobes(kps, 2); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_kprobes returned %d\n", ret); + return ret; + } + + preh_val = 0; + posth_val = 0; + ret = target(rand1); + + if (preh_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe pre_handler not called\n"); + handler_errors++; + } + + if (posth_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe post_handler not called\n"); + handler_errors++; + } + + preh_val = 0; + posth_val = 0; + ret = target2(rand1); + + if (preh_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe pre_handler2 not called\n"); + handler_errors++; + } + + if (posth_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe post_handler2 not called\n"); + handler_errors++; + } + + unregister_kprobes(kps, 2); + return 0; + +} + static u32 j_kprobe_target(u32 value) { if (value != rand1) { @@ -121,7 +189,7 @@ static int test_jprobe(void) return ret; } - ret = kprobe_target(rand1); + ret = target(rand1); unregister_jprobe(&jp); if (jph_val == 0) { printk(KERN_ERR "Kprobe smoke test failed: " @@ -132,6 +200,43 @@ static int test_jprobe(void) return 0; } +static struct jprobe jp2 = { + .entry = j_kprobe_target, + .kp.symbol_name = "kprobe_target2" +}; + +static int test_jprobes(void) +{ + int ret; + struct jprobe *jps[2] = {&jp, &jp2}; + + jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ + ret = register_jprobes(jps, 2); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_jprobes returned %d\n", ret); + return ret; + } + + jph_val = 0; + ret = target(rand1); + if (jph_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "jprobe handler not called\n"); + handler_errors++; + } + + jph_val = 0; + ret = target2(rand1); + if (jph_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "jprobe handler2 not called\n"); + handler_errors++; + } + unregister_jprobes(jps, 2); + + return 0; +} #ifdef CONFIG_KRETPROBES static u32 krph_val; @@ -177,7 +282,7 @@ static int test_kretprobe(void) return ret; } - ret = kprobe_target(rand1); + ret = target(rand1); unregister_kretprobe(&rp); if (krph_val != rand1) { printk(KERN_ERR "Kprobe smoke test failed: " @@ -187,12 +292,72 @@ static int test_kretprobe(void) return 0; } + +static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long ret = regs_return_value(regs); + + if (ret != (rand1 / div_factor) + 1) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in kretprobe handler2\n"); + } + if (krph_val == 0) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "call to kretprobe entry handler failed\n"); + } + + krph_val = rand1; + return 0; +} + +static struct kretprobe rp2 = { + .handler = return_handler2, + .entry_handler = entry_handler, + .kp.symbol_name = "kprobe_target2" +}; + +static int test_kretprobes(void) +{ + int ret; + struct kretprobe *rps[2] = {&rp, &rp2}; + + rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ + ret = register_kretprobes(rps, 2); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_kretprobe returned %d\n", ret); + return ret; + } + + krph_val = 0; + ret = target(rand1); + if (krph_val != rand1) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kretprobe handler not called\n"); + handler_errors++; + } + + krph_val = 0; + ret = target2(rand1); + if (krph_val != rand1) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kretprobe handler2 not called\n"); + handler_errors++; + } + unregister_kretprobes(rps, 2); + return 0; +} #endif /* CONFIG_KRETPROBES */ int init_test_probes(void) { int ret; + target = kprobe_target; + target2 = kprobe_target2; + do { rand1 = random32(); } while (rand1 <= div_factor); @@ -204,15 +369,30 @@ int init_test_probes(void) errors++; num_tests++; + ret = test_kprobes(); + if (ret < 0) + errors++; + + num_tests++; ret = test_jprobe(); if (ret < 0) errors++; + num_tests++; + ret = test_jprobes(); + if (ret < 0) + errors++; + #ifdef CONFIG_KRETPROBES num_tests++; ret = test_kretprobe(); if (ret < 0) errors++; + + num_tests++; + ret = test_kretprobes(); + if (ret < 0) + errors++; #endif /* CONFIG_KRETPROBES */ if (errors) diff --git a/kernel/time.c b/kernel/time.c index d63a4336fad6..4886e3ce83a4 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -37,6 +37,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/math64.h> +#include <linux/ptrace.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -65,8 +66,9 @@ asmlinkage long sys_time(time_t __user * tloc) if (tloc) { if (put_user(i,tloc)) - i = -EFAULT; + return -EFAULT; } + force_successful_syscall_return(); return i; } diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 2dc06ab35716..43f891b05a4b 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -92,8 +92,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) mm = get_task_mm(p); if (mm) { /* adjust to KB unit */ - stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB; - stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB; + stats->hiwater_rss = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB; + stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; mmput(mm); } stats->read_char = p->ioac.rchar; diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c index 486da62b2b07..9681d54b95d1 100644 --- a/lib/bust_spinlocks.c +++ b/lib/bust_spinlocks.c @@ -12,6 +12,7 @@ #include <linux/tty.h> #include <linux/wait.h> #include <linux/vt_kern.h> +#include <linux/console.h> void __attribute__((weak)) bust_spinlocks(int yes) @@ -22,6 +23,7 @@ void __attribute__((weak)) bust_spinlocks(int yes) #ifdef CONFIG_VT unblank_screen(); #endif + console_unblank(); if (--oops_in_progress == 0) wake_up_klogd(); } diff --git a/lib/fault-inject.c b/lib/fault-inject.c index a50a311554cc..f97af55bdd96 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -6,7 +6,6 @@ #include <linux/fs.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/unwind.h> #include <linux/stacktrace.h> #include <linux/kallsyms.h> #include <linux/fault-inject.h> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index b255b939bc1b..a60bd8046095 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -9,10 +9,8 @@ #include <linux/cpu.h> #include <linux/module.h> -#ifdef CONFIG_HOTPLUG_CPU static LIST_HEAD(percpu_counters); static DEFINE_MUTEX(percpu_counters_lock); -#endif void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { @@ -111,13 +109,24 @@ void percpu_counter_destroy(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_destroy); -#ifdef CONFIG_HOTPLUG_CPU +int percpu_counter_batch __read_mostly = 32; +EXPORT_SYMBOL(percpu_counter_batch); + +static void compute_batch_value(void) +{ + int nr = num_online_cpus(); + + percpu_counter_batch = max(32, nr*2); +} + static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { +#ifdef CONFIG_HOTPLUG_CPU unsigned int cpu; struct percpu_counter *fbc; + compute_batch_value(); if (action != CPU_DEAD) return NOTIFY_OK; @@ -134,13 +143,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, spin_unlock_irqrestore(&fbc->lock, flags); } mutex_unlock(&percpu_counters_lock); +#endif return NOTIFY_OK; } static int __init percpu_counter_startup(void) { + compute_batch_value(); hotcpu_notifier(percpu_counter_hotcpu_callback, 0); return 0; } module_init(percpu_counter_startup); -#endif diff --git a/lib/prio_heap.c b/lib/prio_heap.c index 471944a54e23..a7af6f85eca8 100644 --- a/lib/prio_heap.c +++ b/lib/prio_heap.c @@ -31,7 +31,7 @@ void *heap_insert(struct ptr_heap *heap, void *p) if (heap->size < heap->max) { /* Heap insertion */ - int pos = heap->size++; + pos = heap->size++; while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) { ptrs[pos] = ptrs[(pos-1)/2]; pos = (pos-1)/2; diff --git a/lib/proportions.c b/lib/proportions.c index 4f387a643d72..3fda810faf0d 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -147,6 +147,7 @@ out: * this is used to track the active references. */ static struct prop_global *prop_get_global(struct prop_descriptor *pd) +__acquires(RCU) { int index; @@ -160,6 +161,7 @@ static struct prop_global *prop_get_global(struct prop_descriptor *pd) } static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) +__releases(RCU) { rcu_read_unlock(); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index be86b32bc874..8d3fb0bd1288 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -81,7 +81,7 @@ struct radix_tree_preload { int nr; struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; }; -DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; +static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; static inline gfp_t root_gfp_mask(struct radix_tree_root *root) { diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 98d632277ca8..0fbd0121d91d 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -170,6 +170,8 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res) return -EINVAL; val = simple_strtoul(cp, &tail, base); + if (tail == cp) + return -EINVAL; if ((*tail == '\0') || ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { *res = val; @@ -241,6 +243,8 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res) return -EINVAL; val = simple_strtoull(cp, &tail, base); + if (tail == cp) + return -EINVAL; if ((*tail == '\0') || ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { *res = val; diff --git a/mm/Kconfig b/mm/Kconfig index 5b5790f8a816..a5b77811fdf2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -181,12 +181,6 @@ config MIGRATION example on NUMA systems to put pages nearer to the processors accessing the page. -config RESOURCES_64BIT - bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL) - default 64BIT - help - This option allows memory and IO resources to be 64 bit. - config PHYS_ADDR_T_64BIT def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT diff --git a/mm/Makefile b/mm/Makefile index 51c27709cc7c..72255be57f89 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -9,7 +9,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o pdflush.o \ - readahead.o swap.o truncate.o vmscan.o \ + readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o $(mmu-y) @@ -21,9 +21,7 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o -obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o -obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_SLAB) += slab.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 801c08b046e6..6f80beddd8a4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -24,9 +24,9 @@ static void bdi_debug_init(void) static int bdi_debug_stats_show(struct seq_file *m, void *v) { struct backing_dev_info *bdi = m->private; - long background_thresh; - long dirty_thresh; - long bdi_thresh; + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long bdi_thresh; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); diff --git a/mm/bootmem.c b/mm/bootmem.c index ac5a891f142a..51a0ccf61e0e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -435,6 +435,10 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, unsigned long fallback = 0; unsigned long min, max, start, sidx, midx, step; + bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n", + bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, + align, goal, limit); + BUG_ON(!size); BUG_ON(align & (align - 1)); BUG_ON(limit && goal + size > limit); @@ -442,10 +446,6 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata, if (!bdata->node_bootmem_map) return NULL; - bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n", - bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, - align, goal, limit); - min = bdata->node_min_pfn; max = bdata->node_low_pfn; diff --git a/mm/filemap.c b/mm/filemap.c index f5769b4dc075..2f55a1e2baf7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -210,7 +210,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, int ret; struct writeback_control wbc = { .sync_mode = sync_mode, - .nr_to_write = mapping->nrpages * 2, + .nr_to_write = LONG_MAX, .range_start = start, .range_end = end, }; @@ -741,7 +741,14 @@ repeat: page = __page_cache_alloc(gfp_mask); if (!page) return NULL; - err = add_to_page_cache_lru(page, mapping, index, gfp_mask); + /* + * We want a regular kernel memory (not highmem or DMA etc) + * allocation for the radix tree nodes, but we need to honour + * the context-specific requirements the caller has asked for. + * GFP_RECLAIM_MASK collects those requirements. + */ + err = add_to_page_cache_lru(page, mapping, index, + (gfp_mask & GFP_RECLAIM_MASK)); if (unlikely(err)) { page_cache_release(page); page = NULL; @@ -950,7 +957,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) return NULL; } page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); - if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) { + if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) { page_cache_release(page); page = NULL; } @@ -1317,7 +1324,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, goto out; /* skip atime */ size = i_size_read(inode); if (pos < size) { - retval = filemap_write_and_wait(mapping); + retval = filemap_write_and_wait_range(mapping, pos, + pos + iov_length(iov, nr_segs) - 1); if (!retval) { retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs); @@ -1530,7 +1538,6 @@ retry_find: /* * Found the page and have a reference on it. */ - mark_page_accessed(page); ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT; vmf->page = page; return ret | VM_FAULT_LOCKED; @@ -2060,18 +2067,10 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (count != ocount) *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); - /* - * Unmap all mmappings of the file up-front. - * - * This will cause any pte dirty bits to be propagated into the - * pageframes for the subsequent filemap_write_and_wait(). - */ write_len = iov_length(iov, *nr_segs); end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; - if (mapping_mapped(mapping)) - unmap_mapping_range(mapping, pos, write_len, 0); - written = filemap_write_and_wait(mapping); + written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); if (written) goto out; @@ -2291,7 +2290,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, * the file data here, to try to honour O_DIRECT expectations. */ if (unlikely(file->f_flags & O_DIRECT) && written) - status = filemap_write_and_wait(mapping); + status = filemap_write_and_wait_range(mapping, + pos, pos + written - 1); return written ? written : status; } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index b5167dfb2f2d..0c04615651b7 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -193,7 +193,7 @@ retry: /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); pteval = ptep_clear_flush_notify(vma, address, pte); - page_remove_rmap(page, vma); + page_remove_rmap(page); dec_mm_counter(mm, file_rss); BUG_ON(pte_dirty(pteval)); pte_unmap_unlock(pte, ptl); diff --git a/mm/fremap.c b/mm/fremap.c index 7d12ca70ef7b..62d5bbda921a 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, if (page) { if (pte_dirty(pte)) set_page_dirty(page); - page_remove_rmap(page, vma); + page_remove_rmap(page); page_cache_release(page); update_hiwater_rss(mm); dec_mm_counter(mm, file_rss); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6058b53dcb89..618e98304080 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -220,6 +220,35 @@ static pgoff_t vma_hugecache_offset(struct hstate *h, } /* + * Return the size of the pages allocated when backing a VMA. In the majority + * cases this will be same size as used by the page table entries. + */ +unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) +{ + struct hstate *hstate; + + if (!is_vm_hugetlb_page(vma)) + return PAGE_SIZE; + + hstate = hstate_vma(vma); + + return 1UL << (hstate->order + PAGE_SHIFT); +} + +/* + * Return the page size being used by the MMU to back a VMA. In the majority + * of cases, the page size used by the kernel matches the MMU size. On + * architectures where it differs, an architecture-specific version of this + * function is required. + */ +#ifndef vma_mmu_pagesize +unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) +{ + return vma_kernel_pagesize(vma); +} +#endif + +/* * Flags for MAP_PRIVATE reservations. These are stored in the bottom * bits of the reservation map pointer, which are always clear due to * alignment. @@ -371,8 +400,10 @@ static void clear_huge_page(struct page *page, { int i; - if (unlikely(sz > MAX_ORDER_NR_PAGES)) - return clear_gigantic_page(page, addr, sz); + if (unlikely(sz > MAX_ORDER_NR_PAGES)) { + clear_gigantic_page(page, addr, sz); + return; + } might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++) { @@ -404,8 +435,10 @@ static void copy_huge_page(struct page *dst, struct page *src, int i; struct hstate *h = hstate_vma(vma); - if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) - return copy_gigantic_page(dst, src, addr, vma); + if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { + copy_gigantic_page(dst, src, addr, vma); + return; + } might_sleep(); for (i = 0; i < pages_per_huge_page(h); i++) { @@ -972,7 +1005,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return page; } -__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h) +int __weak alloc_bootmem_huge_page(struct hstate *h) { struct huge_bootmem_page *m; int nr_nodes = nodes_weight(node_online_map); @@ -991,8 +1024,7 @@ __attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h) * puts them into the mem_map). */ m = addr; - if (m) - goto found; + goto found; } hstate_next_node(h); nr_nodes--; diff --git a/mm/internal.h b/mm/internal.h index 13333bc2eb68..478223b73a2a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -49,6 +49,7 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ +extern unsigned long highest_memmap_pfn; extern void __free_pages_bootmem(struct page *page, unsigned int order); /* @@ -275,6 +276,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, #define GUP_FLAGS_WRITE 0x1 #define GUP_FLAGS_FORCE 0x2 #define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4 +#define GUP_FLAGS_IGNORE_SIGKILL 0x8 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int flags, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 866dcc7eeb0c..51ee96545579 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -779,7 +779,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) return 0; } -int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) +static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, + unsigned long long val) { int retry_count = MEM_CGROUP_RECLAIM_RETRIES; diff --git a/mm/memory.c b/mm/memory.c index 7b9db658aca2..3f8fa06b963b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -52,6 +52,9 @@ #include <linux/writeback.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> +#include <linux/kallsyms.h> +#include <linux/swapops.h> +#include <linux/elf.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -59,9 +62,6 @@ #include <asm/tlbflush.h> #include <asm/pgtable.h> -#include <linux/swapops.h> -#include <linux/elf.h> - #include "internal.h" #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -375,15 +375,65 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) * * The calling function must still handle the error. */ -static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, - unsigned long vaddr) -{ - printk(KERN_ERR "Bad pte = %08llx, process = %s, " - "vm_flags = %lx, vaddr = %lx\n", - (long long)pte_val(pte), - (vma->vm_mm == current->mm ? current->comm : "???"), - vma->vm_flags, vaddr); +static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, + pte_t pte, struct page *page) +{ + pgd_t *pgd = pgd_offset(vma->vm_mm, addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + struct address_space *mapping; + pgoff_t index; + static unsigned long resume; + static unsigned long nr_shown; + static unsigned long nr_unshown; + + /* + * Allow a burst of 60 reports, then keep quiet for that minute; + * or allow a steady drip of one report per second. + */ + if (nr_shown == 60) { + if (time_before(jiffies, resume)) { + nr_unshown++; + return; + } + if (nr_unshown) { + printk(KERN_ALERT + "BUG: Bad page map: %lu messages suppressed\n", + nr_unshown); + nr_unshown = 0; + } + nr_shown = 0; + } + if (nr_shown++ == 0) + resume = jiffies + 60 * HZ; + + mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; + index = linear_page_index(vma, addr); + + printk(KERN_ALERT + "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", + current->comm, + (long long)pte_val(pte), (long long)pmd_val(*pmd)); + if (page) { + printk(KERN_ALERT + "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n", + page, (void *)page->flags, page_count(page), + page_mapcount(page), page->mapping, page->index); + } + printk(KERN_ALERT + "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", + (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); + /* + * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y + */ + if (vma->vm_ops) + print_symbol(KERN_ALERT "vma->vm_ops->fault: %s\n", + (unsigned long)vma->vm_ops->fault); + if (vma->vm_file && vma->vm_file->f_op) + print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n", + (unsigned long)vma->vm_file->f_op->mmap); dump_stack(); + add_taint(TAINT_BAD_PAGE); } static inline int is_cow_mapping(unsigned int flags) @@ -441,21 +491,18 @@ static inline int is_cow_mapping(unsigned int flags) struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - unsigned long pfn; + unsigned long pfn = pte_pfn(pte); if (HAVE_PTE_SPECIAL) { - if (likely(!pte_special(pte))) { - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - return pte_page(pte); - } - VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); + if (likely(!pte_special(pte))) + goto check_pfn; + if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) + print_bad_pte(vma, addr, pte, NULL); return NULL; } /* !HAVE_PTE_SPECIAL case follows: */ - pfn = pte_pfn(pte); - if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { if (!pfn_valid(pfn)) @@ -471,11 +518,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, } } - VM_BUG_ON(!pfn_valid(pfn)); +check_pfn: + if (unlikely(pfn > highest_memmap_pfn)) { + print_bad_pte(vma, addr, pte, NULL); + return NULL; + } /* * NOTE! We still have PageReserved() pages in the page tables. - * * eg. VDSO mappings can cause them to exist. */ out: @@ -767,11 +817,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, else { if (pte_dirty(ptent)) set_page_dirty(page); - if (pte_young(ptent)) - SetPageReferenced(page); + if (pte_young(ptent) && + likely(!VM_SequentialReadHint(vma))) + mark_page_accessed(page); file_rss--; } - page_remove_rmap(page, vma); + page_remove_rmap(page); + if (unlikely(page_mapcount(page) < 0)) + print_bad_pte(vma, addr, ptent, page); tlb_remove_page(tlb, page); continue; } @@ -781,8 +834,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, */ if (unlikely(details)) continue; - if (!pte_file(ptent)) - free_swap_and_cache(pte_to_swp_entry(ptent)); + if (pte_file(ptent)) { + if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) + print_bad_pte(vma, addr, ptent, NULL); + } else if + (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent)))) + print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); @@ -1153,6 +1210,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int write = !!(flags & GUP_FLAGS_WRITE); int force = !!(flags & GUP_FLAGS_FORCE); int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); + int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); if (len <= 0) return 0; @@ -1231,12 +1289,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct page *page; /* - * If tsk is ooming, cut off its access to large memory - * allocations. It has a pending SIGKILL, but it can't - * be processed until returning to user space. + * If we have a pending SIGKILL, don't keep faulting + * pages and potentially allocating memory, unless + * current is handling munlock--e.g., on exit. In + * that case, we are not allocating memory. Rather, + * we're only unlocking already resident/mapped pages. */ - if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) - return i ? i : -ENOMEM; + if (unlikely(!ignore_sigkill && + fatal_signal_pending(current))) + return i ? i : -ERESTARTSYS; if (write) foll_flags |= FOLL_WRITE; @@ -1263,9 +1324,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, * do_wp_page has broken COW when necessary, * even if maybe_mkwrite decided not to set * pte_write. We can thus safely do subsequent - * page lookups as if they were reads. + * page lookups as if they were reads. But only + * do so when looping for pte_write is futile: + * in some cases userspace may also be wanting + * to write to the gotten user page, which a + * read fault here might prevent (a readonly + * page might get reCOWed by userspace write). */ - if (ret & VM_FAULT_WRITE) + if ((ret & VM_FAULT_WRITE) && + !(vma->vm_flags & VM_WRITE)) foll_flags &= ~FOLL_WRITE; cond_resched(); @@ -1644,6 +1711,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, BUG_ON(pmd_huge(*pmd)); + arch_enter_lazy_mmu_mode(); + token = pmd_pgtable(*pmd); do { @@ -1652,6 +1721,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, break; } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); + if (mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; @@ -1837,10 +1908,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * not dirty accountable. */ if (PageAnon(old_page)) { - if (trylock_page(old_page)) { - reuse = can_share_swap_page(old_page); - unlock_page(old_page); + if (!trylock_page(old_page)) { + page_cache_get(old_page); + pte_unmap_unlock(page_table, ptl); + lock_page(old_page); + page_table = pte_offset_map_lock(mm, pmd, address, + &ptl); + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + page_cache_release(old_page); + goto unlock; + } + page_cache_release(old_page); } + reuse = reuse_swap_page(old_page); + unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { /* @@ -1943,11 +2025,7 @@ gotten: * thread doing COW. */ ptep_clear_flush_notify(vma, address, page_table); - SetPageSwapBacked(new_page); - lru_cache_add_active_or_unevictable(new_page, vma); page_add_new_anon_rmap(new_page, vma, address); - -//TODO: is this safe? do_anonymous_page() does it this way. set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); if (old_page) { @@ -1973,7 +2051,7 @@ gotten: * mapcount is visible. So transitively, TLBs to * old page will be flushed before it can be reused. */ - page_remove_rmap(old_page, vma); + page_remove_rmap(old_page); } /* Free the old page.. */ @@ -2374,7 +2452,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter(mm, anon_rss); pte = mk_pte(page, vma->vm_page_prot); - if (write_access && can_share_swap_page(page)) { + if (write_access && reuse_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); write_access = 0; } @@ -2385,7 +2463,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) - remove_exclusive_swap_page(page); + try_to_free_swap(page); unlock_page(page); if (write_access) { @@ -2442,8 +2520,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_none(*page_table)) goto release; inc_mm_counter(mm, anon_rss); - SetPageSwapBacked(page); - lru_cache_add_active_or_unevictable(page, vma); page_add_new_anon_rmap(page, vma, address); set_pte_at(mm, address, page_table, entry); @@ -2591,8 +2667,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (anon) { inc_mm_counter(mm, anon_rss); - SetPageSwapBacked(page); - lru_cache_add_active_or_unevictable(page, vma); page_add_new_anon_rmap(page, vma, address); } else { inc_mm_counter(mm, file_rss); @@ -2602,7 +2676,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, get_page(dirty_page); } } -//TODO: is this safe? do_anonymous_page() does it this way. set_pte_at(mm, address, page_table, entry); /* no need to invalidate: a not-present page won't be cached */ @@ -2666,12 +2739,11 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) return 0; - if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || - !(vma->vm_flags & VM_CAN_NONLINEAR))) { + if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { /* * Page table corrupted: show pte and kill process. */ - print_bad_pte(vma, orig_pte, address); + print_bad_pte(vma, address, orig_pte, NULL); return VM_FAULT_OOM; } @@ -2953,7 +3025,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, { resource_size_t phys_addr; unsigned long prot = 0; - void *maddr; + void __iomem *maddr; int offset = addr & (PAGE_SIZE-1); if (follow_phys(vma, addr, write, &prot, &phys_addr)) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b17371185468..c083cf5fd6df 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -216,7 +216,8 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) return 0; } -static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn) +static int __meminit __add_section(int nid, struct zone *zone, + unsigned long phys_start_pfn) { int nr_pages = PAGES_PER_SECTION; int ret; @@ -234,7 +235,7 @@ static int __meminit __add_section(struct zone *zone, unsigned long phys_start_p if (ret < 0) return ret; - return register_new_memory(__pfn_to_section(phys_start_pfn)); + return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); } #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -273,8 +274,8 @@ static int __remove_section(struct zone *zone, struct mem_section *ms) * call this function after deciding the zone to which to * add the new pages. */ -int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) +int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, + unsigned long nr_pages) { unsigned long i; int err = 0; @@ -284,7 +285,7 @@ int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); for (i = start_sec; i <= end_sec; i++) { - err = __add_section(zone, i << PFN_SECTION_SHIFT); + err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); /* * EEXIST is finally dealt with by ioresource collision @@ -626,15 +627,12 @@ int scan_lru_pages(unsigned long start, unsigned long end) } static struct page * -hotremove_migrate_alloc(struct page *page, - unsigned long private, - int **x) +hotremove_migrate_alloc(struct page *page, unsigned long private, int **x) { - /* This should be improoooooved!! */ - return alloc_page(GFP_HIGHUSER_PAGECACHE); + /* This should be improooooved!! */ + return alloc_page(GFP_HIGHUSER_MOVABLE); } - #define NR_OFFLINE_AT_ONCE_PAGES (256) static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) diff --git a/mm/migrate.c b/mm/migrate.c index 21631ab8c08b..55373983c9c6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -300,12 +300,10 @@ static int migrate_page_move_mapping(struct address_space *mapping, * Now we know that no one else is looking at the page. */ get_page(newpage); /* add cache reference */ -#ifdef CONFIG_SWAP if (PageSwapCache(page)) { SetPageSwapCache(newpage); set_page_private(newpage, page_private(page)); } -#endif radix_tree_replace_slot(pslot, newpage); @@ -373,9 +371,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page) mlock_migrate_page(newpage, page); -#ifdef CONFIG_SWAP ClearPageSwapCache(page); -#endif ClearPagePrivate(page); set_page_private(page, 0); /* page->mapping contains a flag for PageAnon() */ @@ -848,12 +844,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm, struct vm_area_struct *vma; struct page *page; - /* - * A valid page pointer that will not match any of the - * pages that will be moved. - */ - pp->page = ZERO_PAGE(0); - err = -EFAULT; vma = find_vma(mm, pp->addr); if (!vma || !vma_migratable(vma)) @@ -919,41 +909,43 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, const int __user *nodes, int __user *status, int flags) { - struct page_to_node *pm = NULL; + struct page_to_node *pm; nodemask_t task_nodes; - int err = 0; - int i; + unsigned long chunk_nr_pages; + unsigned long chunk_start; + int err; task_nodes = cpuset_mems_allowed(task); - /* Limit nr_pages so that the multiplication may not overflow */ - if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { - err = -E2BIG; - goto out; - } - - pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); - if (!pm) { - err = -ENOMEM; + err = -ENOMEM; + pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); + if (!pm) goto out; - } - /* - * Get parameters from user space and initialize the pm - * array. Return various errors if the user did something wrong. + * Store a chunk of page_to_node array in a page, + * but keep the last one as a marker */ - for (i = 0; i < nr_pages; i++) { - const void __user *p; + chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1; - err = -EFAULT; - if (get_user(p, pages + i)) - goto out_pm; + for (chunk_start = 0; + chunk_start < nr_pages; + chunk_start += chunk_nr_pages) { + int j; - pm[i].addr = (unsigned long)p; - if (nodes) { + if (chunk_start + chunk_nr_pages > nr_pages) + chunk_nr_pages = nr_pages - chunk_start; + + /* fill the chunk pm with addrs and nodes from user-space */ + for (j = 0; j < chunk_nr_pages; j++) { + const void __user *p; int node; - if (get_user(node, nodes + i)) + err = -EFAULT; + if (get_user(p, pages + j + chunk_start)) + goto out_pm; + pm[j].addr = (unsigned long) p; + + if (get_user(node, nodes + j + chunk_start)) goto out_pm; err = -ENODEV; @@ -964,22 +956,29 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, if (!node_isset(node, task_nodes)) goto out_pm; - pm[i].node = node; - } else - pm[i].node = 0; /* anything to not match MAX_NUMNODES */ - } - /* End marker */ - pm[nr_pages].node = MAX_NUMNODES; + pm[j].node = node; + } + + /* End marker for this chunk */ + pm[chunk_nr_pages].node = MAX_NUMNODES; + + /* Migrate this chunk */ + err = do_move_page_to_node_array(mm, pm, + flags & MPOL_MF_MOVE_ALL); + if (err < 0) + goto out_pm; - err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL); - if (err >= 0) /* Return status information */ - for (i = 0; i < nr_pages; i++) - if (put_user(pm[i].status, status + i)) + for (j = 0; j < chunk_nr_pages; j++) + if (put_user(pm[j].status, status + j + chunk_start)) { err = -EFAULT; + goto out_pm; + } + } + err = 0; out_pm: - vfree(pm); + free_page((unsigned long)pm); out: return err; } diff --git a/mm/mlock.c b/mm/mlock.c index 3035a56e7616..e125156c664e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -173,12 +173,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, (atomic_read(&mm->mm_users) != 0)); /* - * mlock: don't page populate if page has PROT_NONE permission. - * munlock: the pages always do munlock althrough - * its has PROT_NONE permission. + * mlock: don't page populate if vma has PROT_NONE permission. + * munlock: always do munlock although the vma has PROT_NONE + * permission, or SIGKILL is pending. */ if (!mlock) - gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS; + gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS | + GUP_FLAGS_IGNORE_SIGKILL; if (vma->vm_flags & VM_WRITE) gup_flags |= GUP_FLAGS_WRITE; diff --git a/mm/mmap.c b/mm/mmap.c index 2c778fcfd9bd..a910c045cfd4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -413,7 +413,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, static void __vma_link_file(struct vm_area_struct *vma) { - struct file * file; + struct file *file; file = vma->vm_file; if (file) { @@ -474,11 +474,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, * insert vm structure into list and rbtree and anon_vma, * but it has already been inserted into prio_tree earlier. */ -static void -__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) +static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { - struct vm_area_struct * __vma, * prev; - struct rb_node ** rb_link, * rb_parent; + struct vm_area_struct *__vma, *prev; + struct rb_node **rb_link, *rb_parent; __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent); BUG_ON(__vma && __vma->vm_start < vma->vm_end); @@ -908,7 +907,7 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, * The caller must hold down_write(current->mm->mmap_sem). */ -unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { @@ -1464,7 +1463,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr) +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma = NULL; @@ -1507,7 +1506,7 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, struct vm_area_struct **pprev) { struct vm_area_struct *vma = NULL, *prev = NULL; - struct rb_node * rb_node; + struct rb_node *rb_node; if (!mm) goto out; @@ -1541,7 +1540,7 @@ out: * update accounting. This is shared with both the * grow-up and grow-down cases. */ -static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow) +static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; @@ -2091,6 +2090,9 @@ void exit_mmap(struct mm_struct *mm) arch_exit_mmap(mm); mmu_notifier_release(mm); + if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */ + return; + if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -2103,7 +2105,7 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); - /* Don't update_hiwater_rss(mm) here, do_exit already did */ + /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); diff --git a/mm/mprotect.c b/mm/mprotect.c index cfb4c4852062..d0f6e7ce09f1 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -22,6 +22,7 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/mmu_notifier.h> +#include <linux/migrate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/cacheflush.h> @@ -59,8 +60,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, ptent = pte_mkwrite(ptent); ptep_modify_prot_commit(mm, addr, pte, ptent); -#ifdef CONFIG_MIGRATION - } else if (!pte_file(oldpte)) { + } else if (PAGE_MIGRATION && !pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); if (is_write_migration_entry(entry)) { @@ -72,9 +72,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, set_pte_at(mm, addr, pte, swp_entry_to_pte(entry)); } -#endif } - } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 558f9afe6e4e..6b9e758c98a5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -31,7 +31,7 @@ int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks; -static DEFINE_SPINLOCK(zone_scan_mutex); +static DEFINE_SPINLOCK(zone_scan_lock); /* #define DEBUG */ /** @@ -392,6 +392,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, printk(KERN_WARNING "%s invoked oom-killer: " "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", current->comm, gfp_mask, order, current->oomkilladj); + task_lock(current); + cpuset_print_task_mems_allowed(current); + task_unlock(current); dump_stack(); show_mem(); if (sysctl_oom_dump_tasks) @@ -470,7 +473,7 @@ int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask) struct zone *zone; int ret = 1; - spin_lock(&zone_scan_mutex); + spin_lock(&zone_scan_lock); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { if (zone_is_oom_locked(zone)) { ret = 0; @@ -480,7 +483,7 @@ int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask) for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { /* - * Lock each zone in the zonelist under zone_scan_mutex so a + * Lock each zone in the zonelist under zone_scan_lock so a * parallel invocation of try_set_zone_oom() doesn't succeed * when it shouldn't. */ @@ -488,7 +491,7 @@ int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask) } out: - spin_unlock(&zone_scan_mutex); + spin_unlock(&zone_scan_lock); return ret; } @@ -502,11 +505,74 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) struct zoneref *z; struct zone *zone; - spin_lock(&zone_scan_mutex); + spin_lock(&zone_scan_lock); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { zone_clear_flag(zone, ZONE_OOM_LOCKED); } - spin_unlock(&zone_scan_mutex); + spin_unlock(&zone_scan_lock); +} + +/* + * Must be called with tasklist_lock held for read. + */ +static void __out_of_memory(gfp_t gfp_mask, int order) +{ + if (sysctl_oom_kill_allocating_task) { + oom_kill_process(current, gfp_mask, order, 0, NULL, + "Out of memory (oom_kill_allocating_task)"); + + } else { + unsigned long points; + struct task_struct *p; + +retry: + /* + * Rambo mode: Shoot down a process and hope it solves whatever + * issues we may have. + */ + p = select_bad_process(&points, NULL); + + if (PTR_ERR(p) == -1UL) + return; + + /* Found nothing?!?! Either we hang forever, or we panic. */ + if (!p) { + read_unlock(&tasklist_lock); + panic("Out of memory and no killable processes...\n"); + } + + if (oom_kill_process(p, gfp_mask, order, points, NULL, + "Out of memory")) + goto retry; + } +} + +/* + * pagefault handler calls into here because it is out of memory but + * doesn't know exactly how or why. + */ +void pagefault_out_of_memory(void) +{ + unsigned long freed = 0; + + blocking_notifier_call_chain(&oom_notify_list, 0, &freed); + if (freed > 0) + /* Got some memory back in the last second. */ + return; + + if (sysctl_panic_on_oom) + panic("out of memory from page fault. panic_on_oom is selected.\n"); + + read_lock(&tasklist_lock); + __out_of_memory(0, 0); /* unknown gfp_mask and order */ + read_unlock(&tasklist_lock); + + /* + * Give "p" a good chance of killing itself before we + * retry to allocate memory. + */ + if (!test_thread_flag(TIF_MEMDIE)) + schedule_timeout_uninterruptible(1); } /** @@ -522,8 +588,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - struct task_struct *p; - unsigned long points = 0; unsigned long freed = 0; enum oom_constraint constraint; @@ -544,7 +608,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) switch (constraint) { case CONSTRAINT_MEMORY_POLICY: - oom_kill_process(current, gfp_mask, order, points, NULL, + oom_kill_process(current, gfp_mask, order, 0, NULL, "No available memory (MPOL_BIND)"); break; @@ -553,35 +617,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) panic("out of memory. panic_on_oom is selected\n"); /* Fall-through */ case CONSTRAINT_CPUSET: - if (sysctl_oom_kill_allocating_task) { - oom_kill_process(current, gfp_mask, order, points, NULL, - "Out of memory (oom_kill_allocating_task)"); - break; - } -retry: - /* - * Rambo mode: Shoot down a process and hope it solves whatever - * issues we may have. - */ - p = select_bad_process(&points, NULL); - - if (PTR_ERR(p) == -1UL) - goto out; - - /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!p) { - read_unlock(&tasklist_lock); - panic("Out of memory and no killable processes...\n"); - } - - if (oom_kill_process(p, gfp_mask, order, points, NULL, - "Out of memory")) - goto retry; - + __out_of_memory(gfp_mask, order); break; } -out: read_unlock(&tasklist_lock); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2970e35fd03f..b493db7841dc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void) int dirty_background_ratio = 5; /* + * dirty_background_bytes starts at 0 (disabled) so that it is a function of + * dirty_background_ratio * the amount of dirtyable memory + */ +unsigned long dirty_background_bytes; + +/* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true */ @@ -80,6 +86,12 @@ int vm_highmem_is_dirtyable; int vm_dirty_ratio = 10; /* + * vm_dirty_bytes starts at 0 (disabled) so that it is a function of + * vm_dirty_ratio * the amount of dirtyable memory + */ +unsigned long vm_dirty_bytes; + +/* * The interval between `kupdate'-style writebacks, in jiffies */ int dirty_writeback_interval = 5 * HZ; @@ -135,23 +147,75 @@ static int calc_period_shift(void) { unsigned long dirty_total; - dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; + if (vm_dirty_bytes) + dirty_total = vm_dirty_bytes / PAGE_SIZE; + else + dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / + 100; return 2 + ilog2(dirty_total - 1); } /* - * update the period when the dirty ratio changes. + * update the period when the dirty threshold changes. */ +static void update_completion_period(void) +{ + int shift = calc_period_shift(); + prop_change_shift(&vm_completions, shift); + prop_change_shift(&vm_dirties, shift); +} + +int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write) + dirty_background_bytes = 0; + return ret; +} + +int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write) + dirty_background_ratio = 0; + return ret; +} + int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret; + + ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { - int shift = calc_period_shift(); - prop_change_shift(&vm_completions, shift); - prop_change_shift(&vm_dirties, shift); + update_completion_period(); + vm_dirty_bytes = 0; + } + return ret; +} + + +int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old_bytes = vm_dirty_bytes; + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_bytes != old_bytes) { + update_completion_period(); + vm_dirty_ratio = 0; } return ret; } @@ -362,26 +426,32 @@ unsigned long determine_dirtyable_memory(void) } void -get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, - struct backing_dev_info *bdi) +get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, + unsigned long *pbdi_dirty, struct backing_dev_info *bdi) { - int background_ratio; /* Percentages */ - int dirty_ratio; - long background; - long dirty; + unsigned long background; + unsigned long dirty; unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; - dirty_ratio = vm_dirty_ratio; - if (dirty_ratio < 5) - dirty_ratio = 5; + if (vm_dirty_bytes) + dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); + else { + int dirty_ratio; - background_ratio = dirty_background_ratio; - if (background_ratio >= dirty_ratio) - background_ratio = dirty_ratio / 2; + dirty_ratio = vm_dirty_ratio; + if (dirty_ratio < 5) + dirty_ratio = 5; + dirty = (dirty_ratio * available_memory) / 100; + } + + if (dirty_background_bytes) + background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); + else + background = (dirty_background_ratio * available_memory) / 100; - background = (background_ratio * available_memory) / 100; - dirty = (dirty_ratio * available_memory) / 100; + if (background >= dirty) + background = dirty / 2; tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { background += background / 4; @@ -423,9 +493,9 @@ static void balance_dirty_pages(struct address_space *mapping) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; - long background_thresh; - long dirty_thresh; - long bdi_thresh; + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long bdi_thresh; unsigned long pages_written = 0; unsigned long write_chunk = sync_writeback_pages(); @@ -580,8 +650,8 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); void throttle_vm_writeout(gfp_t gfp_mask) { - long background_thresh; - long dirty_thresh; + unsigned long background_thresh; + unsigned long dirty_thresh; for ( ; ; ) { get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); @@ -624,8 +694,8 @@ static void background_writeout(unsigned long _min_pages) }; for ( ; ; ) { - long background_thresh; - long dirty_thresh; + unsigned long background_thresh; + unsigned long dirty_thresh; get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); if (global_page_state(NR_FILE_DIRTY) + @@ -868,9 +938,11 @@ int write_cache_pages(struct address_space *mapping, int done = 0; struct pagevec pvec; int nr_pages; + pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ - int scanned = 0; + pgoff_t done_index; + int cycled; int range_whole = 0; long nr_to_write = wbc->nr_to_write; @@ -881,83 +953,134 @@ int write_cache_pages(struct address_space *mapping, pagevec_init(&pvec, 0); if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ + writeback_index = mapping->writeback_index; /* prev offset */ + index = writeback_index; + if (index == 0) + cycled = 1; + else + cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - scanned = 1; + cycled = 1; /* ignore range_cyclic tests */ } retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; + done_index = index; + while (!done && (index <= end)) { + int i; + + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; - scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping + * At this point, the page may be truncated or + * invalidated (changing page->mapping to NULL), or + * even swizzled back from swapper_space to tmpfs file + * mapping. However, page->index will not change + * because we have a reference on the page. */ + if (page->index > end) { + /* + * can't be range_cyclic (1st pass) because + * end == -1 in that case. + */ + done = 1; + break; + } + + done_index = page->index + 1; + lock_page(page); + /* + * Page truncated or invalidated. We can freely skip it + * then, even for data integrity operations: the page + * has disappeared concurrently, so there could be no + * real expectation of this data interity operation + * even if there is now a new, dirty page at the same + * pagecache address. + */ if (unlikely(page->mapping != mapping)) { +continue_unlock: unlock_page(page); continue; } - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; + if (PageWriteback(page)) { + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + else + goto continue_unlock; } - ret = (*writepage)(page, wbc, data); + BUG_ON(PageWriteback(page)); + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; + ret = (*writepage)(page, wbc, data); + if (unlikely(ret)) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(page); + ret = 0; + } else { + /* + * done_index is set past this page, + * so media errors will not choke + * background writeout for the entire + * file. This has consequences for + * range_cyclic semantics (ie. it may + * not be suitable for data integrity + * writeout). + */ + done = 1; + break; + } + } + + if (wbc->sync_mode == WB_SYNC_NONE) { + wbc->nr_to_write--; + if (wbc->nr_to_write <= 0) { + done = 1; + break; + } } - if (ret || (--nr_to_write <= 0)) - done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; done = 1; + break; } } pagevec_release(&pvec); cond_resched(); } - if (!scanned && !done) { + if (!cycled) { /* + * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ - scanned = 1; + cycled = 1; index = 0; + end = writeback_index - 1; goto retry; } if (!wbc->no_nrwrite_index_update) { if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) - mapping->writeback_index = index; + mapping->writeback_index = done_index; wbc->nr_to_write = nr_to_write; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d8ac01474563..7bf22e045318 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -69,7 +69,7 @@ EXPORT_SYMBOL(node_states); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; -long nr_swap_pages; +unsigned long highest_memmap_pfn __read_mostly; int percpu_pagelist_fraction; #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE @@ -223,19 +223,41 @@ static inline int bad_range(struct zone *zone, struct page *page) static void bad_page(struct page *page) { - printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG - "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", - current->comm, page, (int)(2*sizeof(unsigned long)), - (unsigned long)page->flags, page->mapping, - page_mapcount(page), page_count(page)); + static unsigned long resume; + static unsigned long nr_shown; + static unsigned long nr_unshown; + + /* + * Allow a burst of 60 reports, then keep quiet for that minute; + * or allow a steady drip of one report per second. + */ + if (nr_shown == 60) { + if (time_before(jiffies, resume)) { + nr_unshown++; + goto out; + } + if (nr_unshown) { + printk(KERN_ALERT + "BUG: Bad page state: %lu messages suppressed\n", + nr_unshown); + nr_unshown = 0; + } + nr_shown = 0; + } + if (nr_shown++ == 0) + resume = jiffies + 60 * HZ; + + printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", + current->comm, page_to_pfn(page)); + printk(KERN_ALERT + "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n", + page, (void *)page->flags, page_count(page), + page_mapcount(page), page->mapping, page->index); - printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" - KERN_EMERG "Backtrace:\n"); dump_stack(); - page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD; - set_page_count(page, 0); - reset_page_mapcount(page); - page->mapping = NULL; +out: + /* Leave bad fields for debug, except PageBuddy could make trouble */ + __ClearPageBuddy(page); add_taint(TAINT_BAD_PAGE); } @@ -292,25 +314,31 @@ void prep_compound_gigantic_page(struct page *page, unsigned long order) } #endif -static void destroy_compound_page(struct page *page, unsigned long order) +static int destroy_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; + int bad = 0; - if (unlikely(compound_order(page) != order)) + if (unlikely(compound_order(page) != order) || + unlikely(!PageHead(page))) { bad_page(page); + bad++; + } - if (unlikely(!PageHead(page))) - bad_page(page); __ClearPageHead(page); + for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - if (unlikely(!PageTail(p) | - (p->first_page != page))) + if (unlikely(!PageTail(p) | (p->first_page != page))) { bad_page(page); + bad++; + } __ClearPageTail(p); } + + return bad; } static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) @@ -430,7 +458,8 @@ static inline void __free_one_page(struct page *page, int migratetype = get_pageblock_migratetype(page); if (unlikely(PageCompound(page))) - destroy_compound_page(page, order); + if (unlikely(destroy_compound_page(page, order))) + return; page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); @@ -467,18 +496,13 @@ static inline int free_pages_check(struct page *page) if (unlikely(page_mapcount(page) | (page->mapping != NULL) | (page_count(page) != 0) | - (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) + (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) { bad_page(page); - if (PageDirty(page)) - __ClearPageDirty(page); - if (PageSwapBacked(page)) - __ClearPageSwapBacked(page); - /* - * For now, we report if PG_reserved was found set, but do not - * clear it, and do not free the page. But we shall soon need - * to do more, for when the ZERO_PAGE count wraps negative. - */ - return PageReserved(page); + return 1; + } + if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) + page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; + return 0; } /* @@ -523,11 +547,11 @@ static void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags; int i; - int reserved = 0; + int bad = 0; for (i = 0 ; i < (1 << order) ; ++i) - reserved += free_pages_check(page + i); - if (reserved) + bad += free_pages_check(page + i); + if (bad) return; if (!PageHighMem(page)) { @@ -612,23 +636,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) if (unlikely(page_mapcount(page) | (page->mapping != NULL) | (page_count(page) != 0) | - (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) + (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) { bad_page(page); - - /* - * For now, we report if PG_reserved was found set, but do not - * clear it, and do not allocate the page: as a safety net. - */ - if (PageReserved(page)) return 1; + } - page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | - 1 << PG_referenced | 1 << PG_arch_1 | - 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk -#ifdef CONFIG_UNEVICTABLE_LRU - | 1 << PG_mlocked -#endif - ); set_page_private(page, 0); set_page_refcounted(page); @@ -2609,6 +2621,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, unsigned long pfn; struct zone *z; + if (highest_memmap_pfn < end_pfn - 1) + highest_memmap_pfn = end_pfn - 1; + z = &NODE_DATA(nid)->node_zones[zone]; for (pfn = start_pfn; pfn < end_pfn; pfn++) { /* @@ -3381,10 +3396,8 @@ static void __init setup_usemap(struct pglist_data *pgdat, { unsigned long usemapsize = usemap_size(zonesize); zone->pageblock_flags = NULL; - if (usemapsize) { + if (usemapsize) zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); - memset(zone->pageblock_flags, 0, usemapsize); - } } #else static void inline setup_usemap(struct pglist_data *pgdat, @@ -3469,9 +3482,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; if (realsize >= memmap_pages) { realsize -= memmap_pages; - printk(KERN_DEBUG - " %s zone: %lu pages used for memmap\n", - zone_names[j], memmap_pages); + if (memmap_pages) + printk(KERN_DEBUG + " %s zone: %lu pages used for memmap\n", + zone_names[j], memmap_pages); } else printk(KERN_WARNING " %s zone: %lu pages exceeds realsize %lu\n", @@ -4316,7 +4330,7 @@ void setup_per_zone_pages_min(void) * 1TB 101 10GB * 10TB 320 32GB */ -void setup_per_zone_inactive_ratio(void) +static void setup_per_zone_inactive_ratio(void) { struct zone *zone; @@ -4573,19 +4587,6 @@ void *__init alloc_large_system_hash(const char *tablename, return table; } -#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE -struct page *pfn_to_page(unsigned long pfn) -{ - return __pfn_to_page(pfn); -} -unsigned long page_to_pfn(struct page *page) -{ - return __page_to_pfn(page); -} -EXPORT_SYMBOL(pfn_to_page); -EXPORT_SYMBOL(page_to_pfn); -#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ - /* Return a pointer to the bitmap storing bits affecting a block of pages */ static inline unsigned long *get_pageblock_bitmap(struct zone *zone, unsigned long pfn) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index ab27ff750519..d6507a660ed6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -101,7 +101,7 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) } /* __alloc_bootmem...() is protected by !slab_available() */ -int __init_refok init_section_page_cgroup(unsigned long pfn) +static int __init_refok init_section_page_cgroup(unsigned long pfn) { struct mem_section *section; struct page_cgroup *base, *pc; diff --git a/mm/page_io.c b/mm/page_io.c index 065c4480eaf0..dc6ce0afbded 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -98,7 +98,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) struct bio *bio; int ret = 0, rw = WRITE; - if (remove_exclusive_swap_page(page)) { + if (try_to_free_swap(page)) { unlock_page(page); goto out; } @@ -125,8 +125,8 @@ int swap_readpage(struct file *file, struct page *page) struct bio *bio; int ret = 0; - BUG_ON(!PageLocked(page)); - BUG_ON(PageUptodate(page)); + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageUptodate(page)); bio = get_swap_bio(GFP_KERNEL, page_private(page), page, end_swap_bio_read); if (bio == NULL) { diff --git a/mm/rmap.c b/mm/rmap.c index 10993942d6c9..ac4af8cffbf9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -47,9 +47,9 @@ #include <linux/rmap.h> #include <linux/rcupdate.h> #include <linux/module.h> -#include <linux/kallsyms.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> +#include <linux/migrate.h> #include <asm/tlbflush.h> @@ -191,7 +191,7 @@ void __init anon_vma_init(void) * Getting a lock on a stable anon_vma from a page off the LRU is * tricky: page_lock_anon_vma rely on RCU to guard against the races. */ -struct anon_vma *page_lock_anon_vma(struct page *page) +static struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma; unsigned long anon_mapping; @@ -211,7 +211,7 @@ out: return NULL; } -void page_unlock_anon_vma(struct anon_vma *anon_vma) +static void page_unlock_anon_vma(struct anon_vma *anon_vma) { spin_unlock(&anon_vma->lock); rcu_read_unlock(); @@ -359,8 +359,17 @@ static int page_referenced_one(struct page *page, goto out_unmap; } - if (ptep_clear_flush_young_notify(vma, address, pte)) - referenced++; + if (ptep_clear_flush_young_notify(vma, address, pte)) { + /* + * Don't treat a reference through a sequentially read + * mapping as such. If the page has been used in + * another mapping, we will catch it; if this other + * mapping is already gone, the unmap path will have + * set PG_referenced or activated the page. + */ + if (likely(!VM_SequentialReadHint(vma))) + referenced++; + } /* Pretend the page is referenced if the task has the swap token and is in the middle of a page fault. */ @@ -661,9 +670,14 @@ void page_add_anon_rmap(struct page *page, void page_add_new_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { - BUG_ON(address < vma->vm_start || address >= vma->vm_end); - atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */ + VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); + SetPageSwapBacked(page); + atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ __page_set_anon_rmap(page, vma, address); + if (page_evictable(page, vma)) + lru_cache_add_lru(page, LRU_ACTIVE_ANON); + else + add_page_to_unevictable_list(page); } /** @@ -693,7 +707,6 @@ void page_add_file_rmap(struct page *page) */ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { - BUG_ON(page_mapcount(page) == 0); if (PageAnon(page)) __page_check_anon_rmap(page, vma, address); atomic_inc(&page->_mapcount); @@ -703,28 +716,12 @@ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long /** * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from - * @vma: the vm area in which the mapping is removed * * The caller needs to hold the pte lock. */ -void page_remove_rmap(struct page *page, struct vm_area_struct *vma) +void page_remove_rmap(struct page *page) { if (atomic_add_negative(-1, &page->_mapcount)) { - if (unlikely(page_mapcount(page) < 0)) { - printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); - printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page)); - printk (KERN_EMERG " page->flags = %lx\n", page->flags); - printk (KERN_EMERG " page->count = %x\n", page_count(page)); - printk (KERN_EMERG " page->mapping = %p\n", page->mapping); - print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); - if (vma->vm_ops) { - print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault); - } - if (vma->vm_file && vma->vm_file->f_op) - print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); - BUG(); - } - /* * Now that the last pte has gone, s390 must transfer dirty * flag from storage key to struct page. We can usually skip @@ -818,8 +815,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, spin_unlock(&mmlist_lock); } dec_mm_counter(mm, anon_rss); -#ifdef CONFIG_MIGRATION - } else { + } else if (PAGE_MIGRATION) { /* * Store the pfn of the page in a special migration * pte. do_swap_page() will wait until the migration @@ -827,23 +823,19 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, */ BUG_ON(!migration); entry = make_migration_entry(page, pte_write(pteval)); -#endif } set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); BUG_ON(pte_file(*pte)); - } else -#ifdef CONFIG_MIGRATION - if (migration) { + } else if (PAGE_MIGRATION && migration) { /* Establish migration entry for a file page */ swp_entry_t entry; entry = make_migration_entry(page, pte_write(pteval)); set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); } else -#endif dec_mm_counter(mm, file_rss); - page_remove_rmap(page, vma); + page_remove_rmap(page); page_cache_release(page); out_unmap: @@ -958,7 +950,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, if (pte_dirty(pteval)) set_page_dirty(page); - page_remove_rmap(page, vma); + page_remove_rmap(page); page_cache_release(page); dec_mm_counter(mm, file_rss); (*mapcount)--; diff --git a/mm/shmem.c b/mm/shmem.c index f1b0d4871f3a..5941f9801363 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -14,31 +14,39 @@ * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> * + * tiny-shmem: + * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com> + * * This file is released under the GPL. */ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/vfs.h> +#include <linux/mount.h> +#include <linux/file.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/swap.h> + +static struct vfsmount *shm_mnt; + +#ifdef CONFIG_SHMEM /* * This virtual memory filesystem is heavily based on the ramfs. It * extends ramfs by the ability to use swap and honor resource limits * which makes it a completely usable filesystem. */ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/fs.h> #include <linux/xattr.h> #include <linux/exportfs.h> #include <linux/generic_acl.h> -#include <linux/mm.h> #include <linux/mman.h> -#include <linux/file.h> -#include <linux/swap.h> #include <linux/pagemap.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/shmem_fs.h> -#include <linux/mount.h> #include <linux/writeback.h> #include <linux/vfs.h> #include <linux/blkdev.h> @@ -1444,7 +1452,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); - mark_page_accessed(vmf->page); return ret | VM_FAULT_LOCKED; } @@ -2486,7 +2493,6 @@ static struct file_system_type tmpfs_fs_type = { .get_sb = shmem_get_sb, .kill_sb = kill_litter_super, }; -static struct vfsmount *shm_mnt; static int __init init_tmpfs(void) { @@ -2525,7 +2531,51 @@ out4: shm_mnt = ERR_PTR(error); return error; } -module_init(init_tmpfs) + +#else /* !CONFIG_SHMEM */ + +/* + * tiny-shmem: simple shmemfs and tmpfs using ramfs code + * + * This is intended for small system where the benefits of the full + * shmem code (swap-backed and resource-limited) are outweighed by + * their complexity. On systems without swap this code should be + * effectively equivalent, but much lighter weight. + */ + +#include <linux/ramfs.h> + +static struct file_system_type tmpfs_fs_type = { + .name = "tmpfs", + .get_sb = ramfs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init init_tmpfs(void) +{ + BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); + + shm_mnt = kern_mount(&tmpfs_fs_type); + BUG_ON(IS_ERR(shm_mnt)); + + return 0; +} + +int shmem_unuse(swp_entry_t entry, struct page *page) +{ + return 0; +} + +#define shmem_file_operations ramfs_file_operations +#define shmem_vm_ops generic_file_vm_ops +#define shmem_get_inode ramfs_get_inode +#define shmem_acct_size(a, b) 0 +#define shmem_unacct_size(a, b) do {} while (0) +#define SHMEM_MAX_BYTES LLONG_MAX + +#endif /* CONFIG_SHMEM */ + +/* common code */ /** * shmem_file_setup - get an unlinked file living in tmpfs @@ -2569,12 +2619,20 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) if (!inode) goto close_file; +#ifdef CONFIG_SHMEM SHMEM_I(inode)->flags = flags & VM_ACCOUNT; +#endif d_instantiate(dentry, inode); inode->i_size = size; inode->i_nlink = 0; /* It is unlinked */ init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ, - &shmem_file_operations); + &shmem_file_operations); + +#ifndef CONFIG_MMU + error = ramfs_nommu_expand_for_mapping(inode, size); + if (error) + goto close_file; +#endif return file; close_file: @@ -2606,3 +2664,5 @@ int shmem_zero_setup(struct vm_area_struct *vma) vma->vm_ops = &shmem_vm_ops; return 0; } + +module_init(init_tmpfs) diff --git a/mm/swap.c b/mm/swap.c index b135ec90cdeb..ba2c0e8b8b54 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -246,25 +246,6 @@ void add_page_to_unevictable_list(struct page *page) spin_unlock_irq(&zone->lru_lock); } -/** - * lru_cache_add_active_or_unevictable - * @page: the page to be added to LRU - * @vma: vma in which page is mapped for determining reclaimability - * - * place @page on active or unevictable LRU list, depending on - * page_evictable(). Note that if the page is not evictable, - * it goes directly back onto it's zone's unevictable list. It does - * NOT use a per cpu pagevec. - */ -void lru_cache_add_active_or_unevictable(struct page *page, - struct vm_area_struct *vma) -{ - if (page_evictable(page, vma)) - lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page)); - else - add_page_to_unevictable_list(page); -} - /* * Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been @@ -398,28 +379,6 @@ void __pagevec_release(struct pagevec *pvec) EXPORT_SYMBOL(__pagevec_release); /* - * pagevec_release() for pages which are known to not be on the LRU - * - * This function reinitialises the caller's pagevec. - */ -void __pagevec_release_nonlru(struct pagevec *pvec) -{ - int i; - struct pagevec pages_to_free; - - pagevec_init(&pages_to_free, pvec->cold); - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - VM_BUG_ON(PageLRU(page)); - if (put_page_testzero(page)) - pagevec_add(&pages_to_free, page); - } - pagevec_free(&pages_to_free); - pagevec_reinit(pvec); -} - -/* * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ @@ -495,8 +454,7 @@ void pagevec_swap_free(struct pagevec *pvec) struct page *page = pvec->pages[i]; if (PageSwapCache(page) && trylock_page(page)) { - if (PageSwapCache(page)) - remove_exclusive_swap_page_ref(page); + try_to_free_swap(page); unlock_page(page); } } diff --git a/mm/swap_state.c b/mm/swap_state.c index 3353c9029cef..81c825f67a7f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -72,10 +72,10 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { int error; - BUG_ON(!PageLocked(page)); - BUG_ON(PageSwapCache(page)); - BUG_ON(PagePrivate(page)); - BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageSwapCache(page)); + VM_BUG_ON(!PageSwapBacked(page)); + error = radix_tree_preload(gfp_mask); if (!error) { page_cache_get(page); @@ -108,10 +108,9 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { - BUG_ON(!PageLocked(page)); - BUG_ON(!PageSwapCache(page)); - BUG_ON(PageWriteback(page)); - BUG_ON(PagePrivate(page)); + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageSwapCache(page)); + VM_BUG_ON(PageWriteback(page)); radix_tree_delete(&swapper_space.page_tree, page_private(page)); set_page_private(page, 0); @@ -129,13 +128,13 @@ void __delete_from_swap_cache(struct page *page) * Allocate swap space for the page and add the page to the * swap cache. Caller needs to hold the page lock. */ -int add_to_swap(struct page * page, gfp_t gfp_mask) +int add_to_swap(struct page *page) { swp_entry_t entry; int err; - BUG_ON(!PageLocked(page)); - BUG_ON(!PageUptodate(page)); + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageUptodate(page)); for (;;) { entry = get_swap_page(); @@ -154,7 +153,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask) * Add it to the swap cache and mark it dirty */ err = add_to_swap_cache(page, entry, - gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); + __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); switch (err) { case 0: /* Success */ @@ -196,14 +195,14 @@ void delete_from_swap_cache(struct page *page) * If we are the only user, then try to free up the swap cache. * * Its ok to check for PageSwapCache without the page lock - * here because we are going to recheck again inside - * exclusive_swap_page() _with_ the lock. + * here because we are going to recheck again inside + * try_to_free_swap() _with_ the lock. * - Marcelo */ static inline void free_swap_cache(struct page *page) { - if (PageSwapCache(page) && trylock_page(page)) { - remove_exclusive_swap_page(page); + if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) { + try_to_free_swap(page); unlock_page(page); } } diff --git a/mm/swapfile.c b/mm/swapfile.c index 54a9f87e5162..eec5ca758a23 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -16,6 +16,7 @@ #include <linux/namei.h> #include <linux/shm.h> #include <linux/blkdev.h> +#include <linux/random.h> #include <linux/writeback.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -35,6 +36,7 @@ static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; +long nr_swap_pages; long total_swap_pages; static int swap_overflow; static int least_priority; @@ -83,15 +85,96 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) up_read(&swap_unplug_sem); } +/* + * swapon tell device that all the old swap contents can be discarded, + * to allow the swap device to optimize its wear-levelling. + */ +static int discard_swap(struct swap_info_struct *si) +{ + struct swap_extent *se; + int err = 0; + + list_for_each_entry(se, &si->extent_list, list) { + sector_t start_block = se->start_block << (PAGE_SHIFT - 9); + sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); + + if (se->start_page == 0) { + /* Do not discard the swap header page! */ + start_block += 1 << (PAGE_SHIFT - 9); + nr_blocks -= 1 << (PAGE_SHIFT - 9); + if (!nr_blocks) + continue; + } + + err = blkdev_issue_discard(si->bdev, start_block, + nr_blocks, GFP_KERNEL); + if (err) + break; + + cond_resched(); + } + return err; /* That will often be -EOPNOTSUPP */ +} + +/* + * swap allocation tell device that a cluster of swap can now be discarded, + * to allow the swap device to optimize its wear-levelling. + */ +static void discard_swap_cluster(struct swap_info_struct *si, + pgoff_t start_page, pgoff_t nr_pages) +{ + struct swap_extent *se = si->curr_swap_extent; + int found_extent = 0; + + while (nr_pages) { + struct list_head *lh; + + if (se->start_page <= start_page && + start_page < se->start_page + se->nr_pages) { + pgoff_t offset = start_page - se->start_page; + sector_t start_block = se->start_block + offset; + sector_t nr_blocks = se->nr_pages - offset; + + if (nr_blocks > nr_pages) + nr_blocks = nr_pages; + start_page += nr_blocks; + nr_pages -= nr_blocks; + + if (!found_extent++) + si->curr_swap_extent = se; + + start_block <<= PAGE_SHIFT - 9; + nr_blocks <<= PAGE_SHIFT - 9; + if (blkdev_issue_discard(si->bdev, start_block, + nr_blocks, GFP_NOIO)) + break; + } + + lh = se->list.next; + if (lh == &si->extent_list) + lh = lh->next; + se = list_entry(lh, struct swap_extent, list); + } +} + +static int wait_for_discard(void *word) +{ + schedule(); + return 0; +} + #define SWAPFILE_CLUSTER 256 #define LATENCY_LIMIT 256 static inline unsigned long scan_swap_map(struct swap_info_struct *si) { - unsigned long offset, last_in_cluster; + unsigned long offset; + unsigned long scan_base; + unsigned long last_in_cluster = 0; int latency_ration = LATENCY_LIMIT; + int found_free_cluster = 0; - /* + /* * We try to cluster swap pages by allocating them sequentially * in swap. Once we've allocated SWAPFILE_CLUSTER pages this * way, however, we resort to first-free allocation, starting @@ -99,16 +182,42 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) * all over the entire swap partition, so that we reduce * overall disk seek times between swap pages. -- sct * But we do now try to find an empty cluster. -Andrea + * And we let swap pages go all over an SSD partition. Hugh */ si->flags += SWP_SCANNING; - if (unlikely(!si->cluster_nr)) { - si->cluster_nr = SWAPFILE_CLUSTER - 1; - if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) - goto lowest; + scan_base = offset = si->cluster_next; + + if (unlikely(!si->cluster_nr--)) { + if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) { + si->cluster_nr = SWAPFILE_CLUSTER - 1; + goto checks; + } + if (si->flags & SWP_DISCARDABLE) { + /* + * Start range check on racing allocations, in case + * they overlap the cluster we eventually decide on + * (we scan without swap_lock to allow preemption). + * It's hardly conceivable that cluster_nr could be + * wrapped during our scan, but don't depend on it. + */ + if (si->lowest_alloc) + goto checks; + si->lowest_alloc = si->max; + si->highest_alloc = 0; + } spin_unlock(&swap_lock); - offset = si->lowest_bit; + /* + * If seek is expensive, start searching for new cluster from + * start of partition, to minimize the span of allocated swap. + * But if seek is cheap, search from our current position, so + * that swap is allocated from all over the partition: if the + * Flash Translation Layer only remaps within limited zones, + * we don't want to wear out the first zone too quickly. + */ + if (!(si->flags & SWP_SOLIDSTATE)) + scan_base = offset = si->lowest_bit; last_in_cluster = offset + SWAPFILE_CLUSTER - 1; /* Locate the first empty (unaligned) cluster */ @@ -117,43 +226,124 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { spin_lock(&swap_lock); - si->cluster_next = offset-SWAPFILE_CLUSTER+1; - goto cluster; + offset -= SWAPFILE_CLUSTER - 1; + si->cluster_next = offset; + si->cluster_nr = SWAPFILE_CLUSTER - 1; + found_free_cluster = 1; + goto checks; } if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; } } + + offset = si->lowest_bit; + last_in_cluster = offset + SWAPFILE_CLUSTER - 1; + + /* Locate the first empty (unaligned) cluster */ + for (; last_in_cluster < scan_base; offset++) { + if (si->swap_map[offset]) + last_in_cluster = offset + SWAPFILE_CLUSTER; + else if (offset == last_in_cluster) { + spin_lock(&swap_lock); + offset -= SWAPFILE_CLUSTER - 1; + si->cluster_next = offset; + si->cluster_nr = SWAPFILE_CLUSTER - 1; + found_free_cluster = 1; + goto checks; + } + if (unlikely(--latency_ration < 0)) { + cond_resched(); + latency_ration = LATENCY_LIMIT; + } + } + + offset = scan_base; spin_lock(&swap_lock); - goto lowest; + si->cluster_nr = SWAPFILE_CLUSTER - 1; + si->lowest_alloc = 0; } - si->cluster_nr--; -cluster: - offset = si->cluster_next; - if (offset > si->highest_bit) -lowest: offset = si->lowest_bit; -checks: if (!(si->flags & SWP_WRITEOK)) +checks: + if (!(si->flags & SWP_WRITEOK)) goto no_page; if (!si->highest_bit) goto no_page; - if (!si->swap_map[offset]) { - if (offset == si->lowest_bit) - si->lowest_bit++; - if (offset == si->highest_bit) - si->highest_bit--; - si->inuse_pages++; - if (si->inuse_pages == si->pages) { - si->lowest_bit = si->max; - si->highest_bit = 0; + if (offset > si->highest_bit) + scan_base = offset = si->lowest_bit; + if (si->swap_map[offset]) + goto scan; + + if (offset == si->lowest_bit) + si->lowest_bit++; + if (offset == si->highest_bit) + si->highest_bit--; + si->inuse_pages++; + if (si->inuse_pages == si->pages) { + si->lowest_bit = si->max; + si->highest_bit = 0; + } + si->swap_map[offset] = 1; + si->cluster_next = offset + 1; + si->flags -= SWP_SCANNING; + + if (si->lowest_alloc) { + /* + * Only set when SWP_DISCARDABLE, and there's a scan + * for a free cluster in progress or just completed. + */ + if (found_free_cluster) { + /* + * To optimize wear-levelling, discard the + * old data of the cluster, taking care not to + * discard any of its pages that have already + * been allocated by racing tasks (offset has + * already stepped over any at the beginning). + */ + if (offset < si->highest_alloc && + si->lowest_alloc <= last_in_cluster) + last_in_cluster = si->lowest_alloc - 1; + si->flags |= SWP_DISCARDING; + spin_unlock(&swap_lock); + + if (offset < last_in_cluster) + discard_swap_cluster(si, offset, + last_in_cluster - offset + 1); + + spin_lock(&swap_lock); + si->lowest_alloc = 0; + si->flags &= ~SWP_DISCARDING; + + smp_mb(); /* wake_up_bit advises this */ + wake_up_bit(&si->flags, ilog2(SWP_DISCARDING)); + + } else if (si->flags & SWP_DISCARDING) { + /* + * Delay using pages allocated by racing tasks + * until the whole discard has been issued. We + * could defer that delay until swap_writepage, + * but it's easier to keep this self-contained. + */ + spin_unlock(&swap_lock); + wait_on_bit(&si->flags, ilog2(SWP_DISCARDING), + wait_for_discard, TASK_UNINTERRUPTIBLE); + spin_lock(&swap_lock); + } else { + /* + * Note pages allocated by racing tasks while + * scan for a free cluster is in progress, so + * that its final discard can exclude them. + */ + if (offset < si->lowest_alloc) + si->lowest_alloc = offset; + if (offset > si->highest_alloc) + si->highest_alloc = offset; } - si->swap_map[offset] = 1; - si->cluster_next = offset + 1; - si->flags -= SWP_SCANNING; - return offset; } + return offset; +scan: spin_unlock(&swap_lock); while (++offset <= si->highest_bit) { if (!si->swap_map[offset]) { @@ -165,8 +355,18 @@ checks: if (!(si->flags & SWP_WRITEOK)) latency_ration = LATENCY_LIMIT; } } + offset = si->lowest_bit; + while (++offset < scan_base) { + if (!si->swap_map[offset]) { + spin_lock(&swap_lock); + goto checks; + } + if (unlikely(--latency_ration < 0)) { + cond_resched(); + latency_ration = LATENCY_LIMIT; + } + } spin_lock(&swap_lock); - goto lowest; no_page: si->flags -= SWP_SCANNING; @@ -268,7 +468,7 @@ bad_nofile: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); out: return NULL; -} +} static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) { @@ -326,97 +526,58 @@ static inline int page_swapcount(struct page *page) } /* - * We can use this swap cache entry directly - * if there are no other references to it. + * We can write to an anon page without COW if there are no other references + * to it. And as a side-effect, free up its swap: because the old content + * on disk will never be read, and seeking back there to write new content + * later would only waste time away from clustering. */ -int can_share_swap_page(struct page *page) +int reuse_swap_page(struct page *page) { int count; - BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageLocked(page)); count = page_mapcount(page); - if (count <= 1 && PageSwapCache(page)) + if (count <= 1 && PageSwapCache(page)) { count += page_swapcount(page); + if (count == 1 && !PageWriteback(page)) { + delete_from_swap_cache(page); + SetPageDirty(page); + } + } return count == 1; } /* - * Work out if there are any other processes sharing this - * swap cache page. Free it if you can. Return success. + * If swap is getting full, or if there are no more mappings of this page, + * then try_to_free_swap is called to free its swap space. */ -static int remove_exclusive_swap_page_count(struct page *page, int count) +int try_to_free_swap(struct page *page) { - int retval; - struct swap_info_struct * p; - swp_entry_t entry; - - BUG_ON(PagePrivate(page)); - BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageLocked(page)); if (!PageSwapCache(page)) return 0; if (PageWriteback(page)) return 0; - if (page_count(page) != count) /* us + cache + ptes */ - return 0; - - entry.val = page_private(page); - p = swap_info_get(entry); - if (!p) + if (page_swapcount(page)) return 0; - /* Is the only swap cache user the cache itself? */ - retval = 0; - if (p->swap_map[swp_offset(entry)] == 1) { - /* Recheck the page count with the swapcache lock held.. */ - spin_lock_irq(&swapper_space.tree_lock); - if ((page_count(page) == count) && !PageWriteback(page)) { - __delete_from_swap_cache(page); - SetPageDirty(page); - retval = 1; - } - spin_unlock_irq(&swapper_space.tree_lock); - } - spin_unlock(&swap_lock); - - if (retval) { - swap_free(entry); - page_cache_release(page); - } - - return retval; -} - -/* - * Most of the time the page should have two references: one for the - * process and one for the swap cache. - */ -int remove_exclusive_swap_page(struct page *page) -{ - return remove_exclusive_swap_page_count(page, 2); -} - -/* - * The pageout code holds an extra reference to the page. That raises - * the reference count to test for to 2 for a page that is only in the - * swap cache plus 1 for each process that maps the page. - */ -int remove_exclusive_swap_page_ref(struct page *page) -{ - return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page)); + delete_from_swap_cache(page); + SetPageDirty(page); + return 1; } /* * Free the swap entry like above, but also try to * free the page cache entry if it is the last user. */ -void free_swap_and_cache(swp_entry_t entry) +int free_swap_and_cache(swp_entry_t entry) { - struct swap_info_struct * p; + struct swap_info_struct *p; struct page *page = NULL; if (is_migration_entry(entry)) - return; + return 1; p = swap_info_get(entry); if (p) { @@ -430,20 +591,19 @@ void free_swap_and_cache(swp_entry_t entry) spin_unlock(&swap_lock); } if (page) { - int one_user; - - BUG_ON(PagePrivate(page)); - one_user = (page_count(page) == 2); - /* Only cache user (+us), or swap space full? Free it! */ - /* Also recheck PageSwapCache after page is locked (above) */ + /* + * Not mapped elsewhere, or swap space full? Free it! + * Also recheck PageSwapCache now page is locked (above). + */ if (PageSwapCache(page) && !PageWriteback(page) && - (one_user || vm_swap_full())) { + (!page_mapped(page) || vm_swap_full())) { delete_from_swap_cache(page); SetPageDirty(page); } unlock_page(page); page_cache_release(page); } + return p != NULL; } #ifdef CONFIG_HIBERNATION @@ -776,10 +936,10 @@ static int try_to_unuse(unsigned int type) break; } - /* + /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean - * page and read the swap into it. + * page and read the swap into it. */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); @@ -930,7 +1090,16 @@ static int try_to_unuse(unsigned int type) lock_page(page); wait_on_page_writeback(page); } - if (PageSwapCache(page)) + + /* + * It is conceivable that a racing task removed this page from + * swap cache just before we acquired the page lock at the top, + * or while we dropped it in unuse_mm(). The page might even + * be back in swap cache on another swap area: that we must not + * delete, since it may not have been written out to swap yet. + */ + if (PageSwapCache(page) && + likely(page_private(page) == entry.val)) delete_from_swap_cache(page); /* @@ -1203,26 +1372,6 @@ out: return ret; } -#if 0 /* We don't need this yet */ -#include <linux/backing-dev.h> -int page_queue_congested(struct page *page) -{ - struct backing_dev_info *bdi; - - BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */ - - if (PageSwapCache(page)) { - swp_entry_t entry = { .val = page_private(page) }; - struct swap_info_struct *sis; - - sis = get_swap_info_struct(swp_type(entry)); - bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info; - } else - bdi = page->mapping->backing_dev_info; - return bdi_write_congested(bdi); -} -#endif - asmlinkage long sys_swapoff(const char __user * specialfile) { struct swap_info_struct * p = NULL; @@ -1233,7 +1382,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) char * pathname; int i, type, prev; int err; - + if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1253,7 +1402,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) spin_lock(&swap_lock); for (type = swap_list.head; type >= 0; type = swap_info[type].next) { p = swap_info + type; - if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { + if (p->flags & SWP_WRITEOK) { if (p->swap_file->f_mapping == mapping) break; } @@ -1426,12 +1575,12 @@ static int swap_show(struct seq_file *swap, void *v) file = ptr->swap_file; len = seq_path(swap, &file->f_path, " \t\n\\"); seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", - len < 40 ? 40 - len : 1, " ", - S_ISBLK(file->f_path.dentry->d_inode->i_mode) ? + len < 40 ? 40 - len : 1, " ", + S_ISBLK(file->f_path.dentry->d_inode->i_mode) ? "partition" : "file\t", - ptr->pages << (PAGE_SHIFT - 10), - ptr->inuse_pages << (PAGE_SHIFT - 10), - ptr->prio); + ptr->pages << (PAGE_SHIFT - 10), + ptr->inuse_pages << (PAGE_SHIFT - 10), + ptr->prio); return 0; } @@ -1487,12 +1636,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) int i, prev; int error; union swap_header *swap_header = NULL; - int swap_header_version; unsigned int nr_good_pages = 0; int nr_extents = 0; sector_t span; unsigned long maxpages = 1; - int swapfilesize; + unsigned long swapfilepages; unsigned short *swap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; @@ -1570,7 +1718,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) goto bad_swap; } - swapfilesize = i_size_read(inode) >> PAGE_SHIFT; + swapfilepages = i_size_read(inode) >> PAGE_SHIFT; /* * Read the swap header. @@ -1584,101 +1732,86 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) error = PTR_ERR(page); goto bad_swap; } - kmap(page); - swap_header = page_address(page); + swap_header = kmap(page); - if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) - swap_header_version = 1; - else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10)) - swap_header_version = 2; - else { + if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { printk(KERN_ERR "Unable to find swap-space signature\n"); error = -EINVAL; goto bad_swap; } - - switch (swap_header_version) { - case 1: - printk(KERN_ERR "version 0 swap is no longer supported. " - "Use mkswap -v1 %s\n", name); + + /* swap partition endianess hack... */ + if (swab32(swap_header->info.version) == 1) { + swab32s(&swap_header->info.version); + swab32s(&swap_header->info.last_page); + swab32s(&swap_header->info.nr_badpages); + for (i = 0; i < swap_header->info.nr_badpages; i++) + swab32s(&swap_header->info.badpages[i]); + } + /* Check the swap header's sub-version */ + if (swap_header->info.version != 1) { + printk(KERN_WARNING + "Unable to handle swap header version %d\n", + swap_header->info.version); error = -EINVAL; goto bad_swap; - case 2: - /* swap partition endianess hack... */ - if (swab32(swap_header->info.version) == 1) { - swab32s(&swap_header->info.version); - swab32s(&swap_header->info.last_page); - swab32s(&swap_header->info.nr_badpages); - for (i = 0; i < swap_header->info.nr_badpages; i++) - swab32s(&swap_header->info.badpages[i]); - } - /* Check the swap header's sub-version and the size of - the swap file and bad block lists */ - if (swap_header->info.version != 1) { - printk(KERN_WARNING - "Unable to handle swap header version %d\n", - swap_header->info.version); - error = -EINVAL; - goto bad_swap; - } + } - p->lowest_bit = 1; - p->cluster_next = 1; + p->lowest_bit = 1; + p->cluster_next = 1; - /* - * Find out how many pages are allowed for a single swap - * device. There are two limiting factors: 1) the number of - * bits for the swap offset in the swp_entry_t type and - * 2) the number of bits in the a swap pte as defined by - * the different architectures. In order to find the - * largest possible bit mask a swap entry with swap type 0 - * and swap offset ~0UL is created, encoded to a swap pte, - * decoded to a swp_entry_t again and finally the swap - * offset is extracted. This will mask all the bits from - * the initial ~0UL mask that can't be encoded in either - * the swp_entry_t or the architecture definition of a - * swap pte. - */ - maxpages = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0,~0UL)))) - 1; - if (maxpages > swap_header->info.last_page) - maxpages = swap_header->info.last_page; - p->highest_bit = maxpages - 1; + /* + * Find out how many pages are allowed for a single swap + * device. There are two limiting factors: 1) the number of + * bits for the swap offset in the swp_entry_t type and + * 2) the number of bits in the a swap pte as defined by + * the different architectures. In order to find the + * largest possible bit mask a swap entry with swap type 0 + * and swap offset ~0UL is created, encoded to a swap pte, + * decoded to a swp_entry_t again and finally the swap + * offset is extracted. This will mask all the bits from + * the initial ~0UL mask that can't be encoded in either + * the swp_entry_t or the architecture definition of a + * swap pte. + */ + maxpages = swp_offset(pte_to_swp_entry( + swp_entry_to_pte(swp_entry(0, ~0UL)))) - 1; + if (maxpages > swap_header->info.last_page) + maxpages = swap_header->info.last_page; + p->highest_bit = maxpages - 1; - error = -EINVAL; - if (!maxpages) - goto bad_swap; - if (swapfilesize && maxpages > swapfilesize) { - printk(KERN_WARNING - "Swap area shorter than signature indicates\n"); - goto bad_swap; - } - if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) - goto bad_swap; - if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) - goto bad_swap; + error = -EINVAL; + if (!maxpages) + goto bad_swap; + if (swapfilepages && maxpages > swapfilepages) { + printk(KERN_WARNING + "Swap area shorter than signature indicates\n"); + goto bad_swap; + } + if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) + goto bad_swap; + if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) + goto bad_swap; - /* OK, set up the swap map and apply the bad block list */ - swap_map = vmalloc(maxpages * sizeof(short)); - if (!swap_map) { - error = -ENOMEM; - goto bad_swap; - } + /* OK, set up the swap map and apply the bad block list */ + swap_map = vmalloc(maxpages * sizeof(short)); + if (!swap_map) { + error = -ENOMEM; + goto bad_swap; + } - error = 0; - memset(swap_map, 0, maxpages * sizeof(short)); - for (i = 0; i < swap_header->info.nr_badpages; i++) { - int page_nr = swap_header->info.badpages[i]; - if (page_nr <= 0 || page_nr >= swap_header->info.last_page) - error = -EINVAL; - else - swap_map[page_nr] = SWAP_MAP_BAD; - } - nr_good_pages = swap_header->info.last_page - - swap_header->info.nr_badpages - - 1 /* header page */; - if (error) + memset(swap_map, 0, maxpages * sizeof(short)); + for (i = 0; i < swap_header->info.nr_badpages; i++) { + int page_nr = swap_header->info.badpages[i]; + if (page_nr <= 0 || page_nr >= swap_header->info.last_page) { + error = -EINVAL; goto bad_swap; + } + swap_map[page_nr] = SWAP_MAP_BAD; } + nr_good_pages = swap_header->info.last_page - + swap_header->info.nr_badpages - + 1 /* header page */; if (nr_good_pages) { swap_map[0] = SWAP_MAP_BAD; @@ -1697,6 +1830,13 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) goto bad_swap; } + if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { + p->flags |= SWP_SOLIDSTATE; + p->cluster_next = 1 + (random32() % p->highest_bit); + } + if (discard_swap(p) == 0) + p->flags |= SWP_DISCARDABLE; + mutex_lock(&swapon_mutex); spin_lock(&swap_lock); if (swap_flags & SWAP_FLAG_PREFER) @@ -1705,14 +1845,16 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) else p->prio = --least_priority; p->swap_map = swap_map; - p->flags = SWP_ACTIVE; + p->flags |= SWP_WRITEOK; nr_swap_pages += nr_good_pages; total_swap_pages += nr_good_pages; printk(KERN_INFO "Adding %uk swap on %s. " - "Priority:%d extents:%d across:%lluk\n", + "Priority:%d extents:%d across:%lluk %s%s\n", nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, - nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10)); + nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), + (p->flags & SWP_SOLIDSTATE) ? "SS" : "", + (p->flags & SWP_DISCARDABLE) ? "D" : ""); /* insert swap space into swap_list: */ prev = -1; diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c deleted file mode 100644 index 3e67d575ee6e..000000000000 --- a/mm/tiny-shmem.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * tiny-shmem.c: simple shmemfs and tmpfs using ramfs code - * - * Matt Mackall <mpm@selenic.com> January, 2004 - * derived from mm/shmem.c and fs/ramfs/inode.c - * - * This is intended for small system where the benefits of the full - * shmem code (swap-backed and resource-limited) are outweighed by - * their complexity. On systems without swap this code should be - * effectively equivalent, but much lighter weight. - */ - -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/vfs.h> -#include <linux/mount.h> -#include <linux/file.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/swap.h> -#include <linux/ramfs.h> - -static struct file_system_type tmpfs_fs_type = { - .name = "tmpfs", - .get_sb = ramfs_get_sb, - .kill_sb = kill_litter_super, -}; - -static struct vfsmount *shm_mnt; - -static int __init init_tmpfs(void) -{ - BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); - - shm_mnt = kern_mount(&tmpfs_fs_type); - BUG_ON(IS_ERR(shm_mnt)); - - return 0; -} -module_init(init_tmpfs) - -/** - * shmem_file_setup - get an unlinked file living in tmpfs - * @name: name for dentry (to be seen in /proc/<pid>/maps - * @size: size to be set for the file - * @flags: vm_flags - */ -struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) -{ - int error; - struct file *file; - struct inode *inode; - struct dentry *dentry, *root; - struct qstr this; - - if (IS_ERR(shm_mnt)) - return (void *)shm_mnt; - - error = -ENOMEM; - this.name = name; - this.len = strlen(name); - this.hash = 0; /* will go */ - root = shm_mnt->mnt_root; - dentry = d_alloc(root, &this); - if (!dentry) - goto put_memory; - - error = -ENFILE; - file = get_empty_filp(); - if (!file) - goto put_dentry; - - error = -ENOSPC; - inode = ramfs_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); - if (!inode) - goto close_file; - - d_instantiate(dentry, inode); - inode->i_size = size; - inode->i_nlink = 0; /* It is unlinked */ - init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ, - &ramfs_file_operations); - -#ifndef CONFIG_MMU - error = ramfs_nommu_expand_for_mapping(inode, size); - if (error) - goto close_file; -#endif - return file; - -close_file: - put_filp(file); -put_dentry: - dput(dentry); -put_memory: - return ERR_PTR(error); -} -EXPORT_SYMBOL_GPL(shmem_file_setup); - -/** - * shmem_zero_setup - setup a shared anonymous mapping - * @vma: the vma to be mmapped is prepared by do_mmap_pgoff - */ -int shmem_zero_setup(struct vm_area_struct *vma) -{ - struct file *file; - loff_t size = vma->vm_end - vma->vm_start; - - file = shmem_file_setup("dev/zero", size, vma->vm_flags); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (vma->vm_file) - fput(vma->vm_file); - vma->vm_file = file; - vma->vm_ops = &generic_file_vm_ops; - return 0; -} - -int shmem_unuse(swp_entry_t entry, struct page *page) -{ - return 0; -} - -#ifndef CONFIG_MMU -unsigned long shmem_get_unmapped_area(struct file *file, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags) -{ - return ramfs_nommu_get_unmapped_area(file, addr, len, pgoff, flags); -} -#endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7465f22fec0c..c5db9a7264d9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -14,6 +14,7 @@ #include <linux/highmem.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -381,8 +382,9 @@ found: goto retry; } if (printk_ratelimit()) - printk(KERN_WARNING "vmap allocation failed: " - "use vmalloc=<size> to increase size.\n"); + printk(KERN_WARNING + "vmap allocation for size %lu failed: " + "use vmalloc=<size> to increase size.\n", size); return ERR_PTR(-EBUSY); } @@ -432,6 +434,27 @@ static void unmap_vmap_area(struct vmap_area *va) vunmap_page_range(va->va_start, va->va_end); } +static void vmap_debug_free_range(unsigned long start, unsigned long end) +{ + /* + * Unmap page tables and force a TLB flush immediately if + * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free + * bugs similarly to those in linear kernel virtual address + * space after a page has been freed. + * + * All the lazy freeing logic is still retained, in order to + * minimise intrusiveness of this debugging feature. + * + * This is going to be *slow* (linear kernel virtual address + * debugging doesn't do a broadcast TLB flush so it is a lot + * faster). + */ +#ifdef CONFIG_DEBUG_PAGEALLOC + vunmap_page_range(start, end); + flush_tlb_kernel_range(start, end); +#endif +} + /* * lazy_max_pages is the maximum amount of virtual address space we gather up * before attempting to purge with a TLB flush. @@ -472,7 +495,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, int sync, int force_flush) { - static DEFINE_SPINLOCK(purge_lock); + static DEFINE_MUTEX(purge_lock); LIST_HEAD(valist); struct vmap_area *va; int nr = 0; @@ -483,10 +506,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, * the case that isn't actually used at the moment anyway. */ if (!sync && !force_flush) { - if (!spin_trylock(&purge_lock)) + if (!mutex_trylock(&purge_lock)) return; } else - spin_lock(&purge_lock); + mutex_lock(&purge_lock); rcu_read_lock(); list_for_each_entry_rcu(va, &vmap_area_list, list) { @@ -518,7 +541,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, __free_vmap_area(va); spin_unlock(&vmap_area_lock); } - spin_unlock(&purge_lock); + mutex_unlock(&purge_lock); } /* @@ -912,6 +935,7 @@ void vm_unmap_ram(const void *mem, unsigned int count) BUG_ON(addr & (PAGE_SIZE-1)); debug_check_no_locks_freed(mem, size); + vmap_debug_free_range(addr, addr+size); if (likely(count <= VMAP_MAX_ALLOC)) vb_free(mem, size); @@ -1128,6 +1152,8 @@ struct vm_struct *remove_vm_area(const void *addr) if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->private; struct vm_struct *tmp, **p; + + vmap_debug_free_range(va->va_start, va->va_end); free_unmap_vmap_area(va); vm->size -= PAGE_SIZE; @@ -1375,7 +1401,8 @@ void *vmalloc_user(unsigned long size) struct vm_struct *area; void *ret; - ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL, -1, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; @@ -1420,7 +1447,8 @@ EXPORT_SYMBOL(vmalloc_node); void *vmalloc_exec(unsigned long size) { - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); + return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + -1, __builtin_return_address(0)); } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) @@ -1440,7 +1468,8 @@ void *vmalloc_exec(unsigned long size) */ void *vmalloc_32(unsigned long size) { - return __vmalloc(size, GFP_VMALLOC32, PAGE_KERNEL); + return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL, + -1, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_32); @@ -1456,7 +1485,8 @@ void *vmalloc_32_user(unsigned long size) struct vm_struct *area; void *ret; - ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); + ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, + -1, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; diff --git a/mm/vmscan.c b/mm/vmscan.c index d196f46c8808..b07c48b09a93 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -52,6 +52,9 @@ struct scan_control { /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; + /* Number of pages freed so far during a call to shrink_zones() */ + unsigned long nr_reclaimed; + /* This context's GFP mask */ gfp_t gfp_mask; @@ -617,7 +620,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, referenced && page_mapping_inuse(page)) goto activate_locked; -#ifdef CONFIG_SWAP /* * Anonymous process memory has backing store? * Try to allocate it some swap space here. @@ -625,20 +627,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageAnon(page) && !PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; - switch (try_to_munlock(page)) { - case SWAP_FAIL: /* shouldn't happen */ - case SWAP_AGAIN: - goto keep_locked; - case SWAP_MLOCK: - goto cull_mlocked; - case SWAP_SUCCESS: - ; /* fall thru'; add to swap cache */ - } - if (!add_to_swap(page, GFP_ATOMIC)) + if (!add_to_swap(page)) goto activate_locked; may_enter_fs = 1; } -#endif /* CONFIG_SWAP */ mapping = page_mapping(page); @@ -752,6 +744,8 @@ free_it: continue; cull_mlocked: + if (PageSwapCache(page)) + try_to_free_swap(page); unlock_page(page); putback_lru_page(page); continue; @@ -759,7 +753,7 @@ cull_mlocked: activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (PageSwapCache(page) && vm_swap_full()) - remove_exclusive_swap_page_ref(page); + try_to_free_swap(page); VM_BUG_ON(PageActive(page)); SetPageActive(page); pgactivate++; @@ -1173,11 +1167,6 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) zone->prev_priority = priority; } -static inline int zone_is_near_oom(struct zone *zone) -{ - return zone->pages_scanned >= (zone_lru_pages(zone) * 3); -} - /* * This moves pages from the active list to the inactive list. * @@ -1248,6 +1237,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, list_add(&page->lru, &l_inactive); } + /* + * Move the pages to the [file or anon] inactive list. + */ + pagevec_init(&pvec, 1); + pgmoved = 0; + lru = LRU_BASE + file * LRU_FILE; + spin_lock_irq(&zone->lru_lock); /* * Count referenced pages from currently used mappings as @@ -1255,15 +1251,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * This helps balance scan pressure between file and anonymous * pages in get_scan_ratio. */ - zone->recent_rotated[!!file] += pgmoved; - - /* - * Move the pages to the [file or anon] inactive list. - */ - pagevec_init(&pvec, 1); + if (scan_global_lru(sc)) + zone->recent_rotated[!!file] += pgmoved; - pgmoved = 0; - lru = LRU_BASE + file * LRU_FILE; while (!list_empty(&l_inactive)) { page = lru_to_page(&l_inactive); prefetchw_prev_lru_page(page, &l_inactive, flags); @@ -1336,12 +1326,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, unsigned long anon_prio, file_prio; unsigned long ap, fp; - anon = zone_page_state(zone, NR_ACTIVE_ANON) + - zone_page_state(zone, NR_INACTIVE_ANON); - file = zone_page_state(zone, NR_ACTIVE_FILE) + - zone_page_state(zone, NR_INACTIVE_FILE); - free = zone_page_state(zone, NR_FREE_PAGES); - /* If we have no swap space, do not bother scanning anon pages. */ if (nr_swap_pages <= 0) { percent[0] = 0; @@ -1349,6 +1333,12 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, return; } + anon = zone_page_state(zone, NR_ACTIVE_ANON) + + zone_page_state(zone, NR_INACTIVE_ANON); + file = zone_page_state(zone, NR_ACTIVE_FILE) + + zone_page_state(zone, NR_INACTIVE_FILE); + free = zone_page_state(zone, NR_FREE_PAGES); + /* If we have very few page cache pages, force-scan anon pages. */ if (unlikely(file + free <= zone->pages_high)) { percent[0] = 100; @@ -1408,14 +1398,15 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static unsigned long shrink_zone(int priority, struct zone *zone, +static void shrink_zone(int priority, struct zone *zone, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; - unsigned long nr_reclaimed = 0; unsigned long percent[2]; /* anon @ 0; file @ 1 */ enum lru_list l; + unsigned long nr_reclaimed = sc->nr_reclaimed; + unsigned long swap_cluster_max = sc->swap_cluster_max; get_scan_ratio(zone, sc, percent); @@ -1431,7 +1422,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, } zone->lru[l].nr_scan += scan; nr[l] = zone->lru[l].nr_scan; - if (nr[l] >= sc->swap_cluster_max) + if (nr[l] >= swap_cluster_max) zone->lru[l].nr_scan = 0; else nr[l] = 0; @@ -1450,16 +1441,28 @@ static unsigned long shrink_zone(int priority, struct zone *zone, nr[LRU_INACTIVE_FILE]) { for_each_evictable_lru(l) { if (nr[l]) { - nr_to_scan = min(nr[l], - (unsigned long)sc->swap_cluster_max); + nr_to_scan = min(nr[l], swap_cluster_max); nr[l] -= nr_to_scan; nr_reclaimed += shrink_list(l, nr_to_scan, - zone, sc, priority); + zone, sc, priority); } } + /* + * On large memory systems, scan >> priority can become + * really large. This is fine for the starting priority; + * we want to put equal scanning pressure on each zone. + * However, if the VM has a harder time of freeing pages, + * with multiple processes reclaiming pages, the total + * freeing target can get unreasonably large. + */ + if (nr_reclaimed > swap_cluster_max && + priority < DEF_PRIORITY && !current_is_kswapd()) + break; } + sc->nr_reclaimed = nr_reclaimed; + /* * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. @@ -1470,7 +1473,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone, shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); throttle_vm_writeout(sc->gfp_mask); - return nr_reclaimed; } /* @@ -1484,16 +1486,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone, * b) The zones may be over pages_high but they must go *over* pages_high to * satisfy the `incremental min' zone defense algorithm. * - * Returns the number of reclaimed pages. - * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. */ -static unsigned long shrink_zones(int priority, struct zonelist *zonelist, +static void shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); - unsigned long nr_reclaimed = 0; struct zoneref *z; struct zone *zone; @@ -1524,10 +1523,8 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, priority); } - nr_reclaimed += shrink_zone(priority, zone, sc); + shrink_zone(priority, zone, sc); } - - return nr_reclaimed; } /* @@ -1552,7 +1549,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, int priority; unsigned long ret = 0; unsigned long total_scanned = 0; - unsigned long nr_reclaimed = 0; struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long lru_pages = 0; struct zoneref *z; @@ -1580,7 +1576,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->nr_scanned = 0; if (!priority) disable_swap_token(); - nr_reclaimed += shrink_zones(priority, zonelist, sc); + shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from * over limit cgroups @@ -1588,13 +1584,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (scan_global_lru(sc)) { shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); if (reclaim_state) { - nr_reclaimed += reclaim_state->reclaimed_slab; + sc->nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; } } total_scanned += sc->nr_scanned; - if (nr_reclaimed >= sc->swap_cluster_max) { - ret = nr_reclaimed; + if (sc->nr_reclaimed >= sc->swap_cluster_max) { + ret = sc->nr_reclaimed; goto out; } @@ -1617,7 +1613,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, } /* top priority shrink_zones still had more to do? don't OOM, then */ if (!sc->all_unreclaimable && scan_global_lru(sc)) - ret = nr_reclaimed; + ret = sc->nr_reclaimed; out: /* * Now that we've scanned all the zones at this priority level, note @@ -1712,7 +1708,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) int priority; int i; unsigned long total_scanned; - unsigned long nr_reclaimed; struct reclaim_state *reclaim_state = current->reclaim_state; struct scan_control sc = { .gfp_mask = GFP_KERNEL, @@ -1731,7 +1726,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) loop_again: total_scanned = 0; - nr_reclaimed = 0; + sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); @@ -1817,11 +1812,11 @@ loop_again: */ if (!zone_watermark_ok(zone, order, 8*zone->pages_high, end_zone, 0)) - nr_reclaimed += shrink_zone(priority, zone, &sc); + shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); - nr_reclaimed += reclaim_state->reclaimed_slab; + sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; if (zone_is_all_unreclaimable(zone)) continue; @@ -1835,7 +1830,7 @@ loop_again: * even in laptop mode */ if (total_scanned > SWAP_CLUSTER_MAX * 2 && - total_scanned > nr_reclaimed + nr_reclaimed / 2) + total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) sc.may_writepage = 1; } if (all_zones_ok) @@ -1853,7 +1848,7 @@ loop_again: * matches the direct reclaim path behaviour in terms of impact * on zone->*_priority. */ - if (nr_reclaimed >= SWAP_CLUSTER_MAX) + if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) break; } out: @@ -1872,10 +1867,27 @@ out: try_to_freeze(); + /* + * Fragmentation may mean that the system cannot be + * rebalanced for high-order allocations in all zones. + * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, + * it means the zones have been fully scanned and are still + * not balanced. For high-order allocations, there is + * little point trying all over again as kswapd may + * infinite loop. + * + * Instead, recheck all watermarks at order-0 as they + * are the most important. If watermarks are ok, kswapd will go + * back to sleep. High-order users can still perform direct + * reclaim if they wish. + */ + if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) + order = sc.order = 0; + goto loop_again; } - return nr_reclaimed; + return sc.nr_reclaimed; } /* @@ -2227,7 +2239,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) struct task_struct *p = current; struct reclaim_state reclaim_state; int priority; - unsigned long nr_reclaimed = 0; struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), @@ -2260,9 +2271,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) priority = ZONE_RECLAIM_PRIORITY; do { note_zone_scanning_priority(zone, priority); - nr_reclaimed += shrink_zone(priority, zone, &sc); + shrink_zone(priority, zone, &sc); priority--; - } while (priority >= 0 && nr_reclaimed < nr_pages); + } while (priority >= 0 && sc.nr_reclaimed < nr_pages); } slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); @@ -2286,13 +2297,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * Update nr_reclaimed by the number of slab pages we * reclaimed from this zone. */ - nr_reclaimed += slab_reclaimable - + sc.nr_reclaimed += slab_reclaimable - zone_page_state(zone, NR_SLAB_RECLAIMABLE); } p->reclaim_state = NULL; current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); - return nr_reclaimed >= nr_pages; + return sc.nr_reclaimed >= nr_pages; } int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) @@ -2472,7 +2483,7 @@ void scan_mapping_unevictable_pages(struct address_space *mapping) * back onto @zone's unevictable list. */ #define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ -void scan_zone_unevictable_pages(struct zone *zone) +static void scan_zone_unevictable_pages(struct zone *zone) { struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; unsigned long scan; @@ -2514,7 +2525,7 @@ void scan_zone_unevictable_pages(struct zone *zone) * that has possibly/probably made some previously unevictable pages * evictable. */ -void scan_all_zones_unevictable_pages(void) +static void scan_all_zones_unevictable_pages(void) { struct zone *zone; diff --git a/samples/firmware_class/firmware_sample_driver.c b/samples/firmware_class/firmware_sample_driver.c index 11114f389c49..219a29896603 100644 --- a/samples/firmware_class/firmware_sample_driver.c +++ b/samples/firmware_class/firmware_sample_driver.c @@ -100,7 +100,7 @@ static void sample_probe_async(void) " request_firmware_nowait failed\n"); } -static int sample_init(void) +static int __init sample_init(void) { device_initialize(&ghost_device); /* since there is no real hardware insertion I just call the diff --git a/samples/kobject/kobject-example.c b/samples/kobject/kobject-example.c index 08d0d3ff3263..8d9b55a12023 100644 --- a/samples/kobject/kobject-example.c +++ b/samples/kobject/kobject-example.c @@ -101,7 +101,7 @@ static struct attribute_group attr_group = { static struct kobject *example_kobj; -static int example_init(void) +static int __init example_init(void) { int retval; @@ -126,7 +126,7 @@ static int example_init(void) return retval; } -static void example_exit(void) +static void __exit example_exit(void) { kobject_put(example_kobj); } diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c index 7395c0bbae18..45b7d56fb541 100644 --- a/samples/kobject/kset-example.c +++ b/samples/kobject/kset-example.c @@ -229,7 +229,7 @@ static void destroy_foo_obj(struct foo_obj *foo) kobject_put(&foo->kobj); } -static int example_init(void) +static int __init example_init(void) { /* * Create a kset with the name of "kset_example", @@ -264,7 +264,7 @@ foo_error: return -EINVAL; } -static void example_exit(void) +static void __exit example_exit(void) { destroy_foo_obj(baz_obj); destroy_foo_obj(bar_obj); diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c index e90dc5d04392..e9cd9c0bc84f 100644 --- a/samples/markers/marker-example.c +++ b/samples/markers/marker-example.c @@ -30,7 +30,7 @@ static struct file_operations mark_ops = { .open = my_open, }; -static int example_init(void) +static int __init example_init(void) { printk(KERN_ALERT "example init\n"); pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); @@ -39,7 +39,7 @@ static int example_init(void) return 0; } -static void example_exit(void) +static void __exit example_exit(void) { printk(KERN_ALERT "example exit\n"); remove_proc_entry("marker-example", NULL); diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c index e3a964889dc7..9e60eb6ca2d8 100644 --- a/samples/tracepoints/tracepoint-probe-sample.c +++ b/samples/tracepoints/tracepoint-probe-sample.c @@ -28,7 +28,7 @@ static void probe_subsys_eventb(void) printk(KERN_INFO "Event B is encountered\n"); } -int __init tp_sample_trace_init(void) +static int __init tp_sample_trace_init(void) { int ret; @@ -42,7 +42,7 @@ int __init tp_sample_trace_init(void) module_init(tp_sample_trace_init); -void __exit tp_sample_trace_exit(void) +static void __exit tp_sample_trace_exit(void) { unregister_trace_subsys_eventb(probe_subsys_eventb); unregister_trace_subsys_event(probe_subsys_event); diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c index 685a5acb4562..be2a960573f1 100644 --- a/samples/tracepoints/tracepoint-probe-sample2.c +++ b/samples/tracepoints/tracepoint-probe-sample2.c @@ -18,7 +18,7 @@ static void probe_subsys_event(struct inode *inode, struct file *file) inode->i_ino); } -int __init tp_sample_trace_init(void) +static int __init tp_sample_trace_init(void) { int ret; @@ -30,7 +30,7 @@ int __init tp_sample_trace_init(void) module_init(tp_sample_trace_init); -void __exit tp_sample_trace_exit(void) +static void __exit tp_sample_trace_exit(void) { unregister_trace_subsys_event(probe_subsys_event); tracepoint_synchronize_unregister(); diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c index 00d169792a3e..68d5dc0310e4 100644 --- a/samples/tracepoints/tracepoint-sample.c +++ b/samples/tracepoints/tracepoint-sample.c @@ -32,7 +32,7 @@ static struct file_operations mark_ops = { .open = my_open, }; -static int example_init(void) +static int __init example_init(void) { printk(KERN_ALERT "example init\n"); pentry_example = proc_create("tracepoint-example", 0444, NULL, @@ -42,7 +42,7 @@ static int example_init(void) return 0; } -static void example_exit(void) +static void __exit example_exit(void) { printk(KERN_ALERT "example exit\n"); remove_proc_entry("tracepoint-example", NULL); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f88bb3e21cda..7bed4ed2c519 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1,7 +1,8 @@ #!/usr/bin/perl -w # (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit) # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) -# (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc) +# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) +# (c) 2008, Andy Whitcroft <apw@canonical.com> # Licensed under the terms of the GNU GPL License version 2 use strict; @@ -9,7 +10,7 @@ use strict; my $P = $0; $P =~ s@.*/@@g; -my $V = '0.24'; +my $V = '0.26'; use Getopt::Long qw(:config no_auto_abbrev); @@ -68,7 +69,9 @@ my $dbg_possible = 0; my $dbg_type = 0; my $dbg_attr = 0; for my $key (keys %debug) { - eval "\${dbg_$key} = '$debug{$key}';" + ## no critic + eval "\${dbg_$key} = '$debug{$key}';"; + die "$@" if ($@); } if ($terse) { @@ -116,7 +119,8 @@ our $Attribute = qr{ __(?:mem|cpu|dev|)(?:initdata|init)| ____cacheline_aligned| ____cacheline_aligned_in_smp| - ____cacheline_internodealigned_in_smp + ____cacheline_internodealigned_in_smp| + __weak }x; our $Modifier; our $Inline = qr{inline|__always_inline|noinline}; @@ -125,6 +129,7 @@ our $Lval = qr{$Ident(?:$Member)*}; our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*}; our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)}; +our $Compare = qr{<=|>=|==|!=|<|>}; our $Operators = qr{ <=|>=|==|!=| =>|->|<<|>>|<|>|!|~| @@ -190,7 +195,7 @@ sub build_types { }x; $Type = qr{ $NonptrType - (?:\s*\*+\s*const|\s*\*+|(?:\s*\[\s*\])+)? + (?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)? (?:\s+$Inline|\s+$Modifier)* }x; $Declare = qr{(?:$Storage\s+)?$Type}; @@ -203,9 +208,9 @@ my @dep_includes = (); my @dep_functions = (); my $removal = "Documentation/feature-removal-schedule.txt"; if ($tree && -f "$root/$removal") { - open(REMOVE, "<$root/$removal") || + open(my $REMOVE, '<', "$root/$removal") || die "$P: $removal: open failed - $!\n"; - while (<REMOVE>) { + while (<$REMOVE>) { if (/^Check:\s+(.*\S)/) { for my $entry (split(/[, ]+/, $1)) { if ($entry =~ m@include/(.*)@) { @@ -217,17 +222,21 @@ if ($tree && -f "$root/$removal") { } } } + close($REMOVE); } my @rawlines = (); my @lines = (); my $vname; for my $filename (@ARGV) { + my $FILE; if ($file) { - open(FILE, "diff -u /dev/null $filename|") || + open($FILE, '-|', "diff -u /dev/null $filename") || die "$P: $filename: diff failed - $!\n"; + } elsif ($filename eq '-') { + open($FILE, '<&STDIN'); } else { - open(FILE, "<$filename") || + open($FILE, '<', "$filename") || die "$P: $filename: open failed - $!\n"; } if ($filename eq '-') { @@ -235,11 +244,11 @@ for my $filename (@ARGV) { } else { $vname = $filename; } - while (<FILE>) { + while (<$FILE>) { chomp; push(@rawlines, $_); } - close(FILE); + close($FILE); if (!process($filename)) { $exit = 1; } @@ -366,7 +375,7 @@ sub sanitise_line { } } - #print "SQ:$sanitise_quote\n"; + #print "c<$c> SQ<$sanitise_quote>\n"; if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { substr($res, $off, 1, $;); } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { @@ -402,6 +411,7 @@ sub ctx_statement_block { my $type = ''; my $level = 0; + my @stack = ([$type, $level]); my $p; my $c; my $len = 0; @@ -433,6 +443,16 @@ sub ctx_statement_block { $remainder = substr($blk, $off); #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n"; + + # Handle nested #if/#else. + if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) { + push(@stack, [ $type, $level ]); + } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) { + ($type, $level) = @{$stack[$#stack - 1]}; + } elsif ($remainder =~ /^#\s*endif\b/) { + ($type, $level) = @{pop(@stack)}; + } + # Statement ends at the ';' or a close '}' at the # outermost level. if ($level == 0 && $c eq ';') { @@ -579,11 +599,22 @@ sub ctx_block_get { my @res = (); my $level = 0; + my @stack = ($level); for ($line = $start; $remain > 0; $line++) { next if ($rawlines[$line] =~ /^-/); $remain--; $blk .= $rawlines[$line]; + + # Handle nested #if/#else. + if ($rawlines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) { + push(@stack, $level); + } elsif ($rawlines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) { + $level = $stack[$#stack - 1]; + } elsif ($rawlines[$line] =~ /^.\s*#\s*endif\b/) { + $level = pop(@stack); + } + foreach my $c (split(//, $rawlines[$line])) { ##print "C<$c>L<$level><$open$close>O<$off>\n"; if ($off > 0) { @@ -843,11 +874,11 @@ sub annotate_values { $type = 'V'; $av_pending = 'V'; - } elsif ($cur =~ /^($Ident\s*):/) { - if ($type eq 'E') { - $av_pend_colon = 'L'; - } elsif ($type eq 'T') { + } elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) { + if (defined $2 && $type eq 'C' || $type eq 'T') { $av_pend_colon = 'B'; + } elsif ($type eq 'E') { + $av_pend_colon = 'L'; } print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1); $type = 'V'; @@ -865,6 +896,10 @@ sub annotate_values { $type = 'E'; $av_pend_colon = 'O'; + } elsif ($cur =~/^(,)/) { + print "COMMA($1)\n" if ($dbg_values > 1); + $type = 'C'; + } elsif ($cur =~ /^(\?)/o) { print "QUESTION($1)\n" if ($dbg_values > 1); $type = 'N'; @@ -880,7 +915,7 @@ sub annotate_values { } $av_pend_colon = 'O'; - } elsif ($cur =~ /^(;|\[)/o) { + } elsif ($cur =~ /^(\[)/o) { print "CLOSE($1)\n" if ($dbg_values > 1); $type = 'N'; @@ -1051,6 +1086,7 @@ sub process { my $in_comment = 0; my $comment_edge = 0; my $first_line = 0; + my $p1_prefix = ''; my $prev_values = 'E'; @@ -1097,9 +1133,12 @@ sub process { $rawlines[$ln - 1] =~ /^-/); $cnt--; #print "RAW<$rawlines[$ln - 1]>\n"; - ($edge) = (defined $rawlines[$ln - 1] && - $rawlines[$ln - 1] =~ m@(/\*|\*/)@); - last if (defined $edge); + last if (!defined $rawlines[$ln - 1]); + if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ && + $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) { + ($edge) = $1; + last; + } } if (defined $edge && $edge eq '*/') { $in_comment = 1; @@ -1109,7 +1148,7 @@ sub process { # is the start of a diff block and this line starts # ' *' then it is very likely a comment. if (!defined $edge && - $rawlines[$linenr] =~ m@^.\s* \*(?:\s|$)@) + $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@) { $in_comment = 1; } @@ -1196,7 +1235,12 @@ sub process { # extract the filename as it passes if ($line=~/^\+\+\+\s+(\S+)/) { $realfile = $1; - $realfile =~ s@^[^/]*/@@; + $realfile =~ s@^([^/]*)/@@; + + $p1_prefix = $1; + if ($tree && $p1_prefix ne '' && -e "$root/$p1_prefix") { + WARN("patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n"); + } if ($realfile =~ m@^include/asm/@) { ERROR("do not modify files in include/asm, change architecture specific files in include/asm-<architecture>\n" . "$here$rawline\n"); @@ -1336,7 +1380,7 @@ sub process { } # any (foo ... *) is a pointer cast, and foo is a type - while ($s =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/sg) { + while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) { possible($1, "C:" . $s); } @@ -1594,7 +1638,7 @@ sub process { $herecurr); } # check for static initialisers. - if ($line =~ /\s*static\s.*=\s*(0|NULL|false)\s*;/) { + if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) { ERROR("do not initialise statics to 0 or NULL\n" . $herecurr); } @@ -1602,7 +1646,7 @@ sub process { # check for new typedefs, only function parameters and sparse annotations # make sense. if ($line =~ /\btypedef\s/ && - $line !~ /\btypedef\s+$Type\s+\(\s*\*?$Ident\s*\)\s*\(/ && + $line !~ /\btypedef\s+$Type\s*\(\s*\*?$Ident\s*\)\s*\(/ && $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ && $line !~ /\b$typeTypedefs\b/ && $line !~ /\b__bitwise(?:__|)\b/) { @@ -1610,21 +1654,39 @@ sub process { } # * goes on variable not on type - if ($line =~ m{\($NonptrType(\*+)(?:\s+const)?\)}) { - ERROR("\"(foo$1)\" should be \"(foo $1)\"\n" . - $herecurr); + # (char*[ const]) + if ($line =~ m{\($NonptrType(\s*\*[\s\*]*(?:$Modifier\s*)*)\)}) { + my ($from, $to) = ($1, $1); - } elsif ($line =~ m{\($NonptrType\s+(\*+)(?!\s+const)\s+\)}) { - ERROR("\"(foo $1 )\" should be \"(foo $1)\"\n" . - $herecurr); + # Should start with a space. + $to =~ s/^(\S)/ $1/; + # Should not end with a space. + $to =~ s/\s+$//; + # '*'s should not have spaces between. + while ($to =~ s/(.)\s\*/$1\*/) { + } - } elsif ($line =~ m{\b$NonptrType(\*+)(?:\s+(?:$Attribute|$Sparse))?\s+[A-Za-z\d_]+}) { - ERROR("\"foo$1 bar\" should be \"foo $1bar\"\n" . - $herecurr); + #print "from<$from> to<$to>\n"; + if ($from ne $to) { + ERROR("\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr); + } + } elsif ($line =~ m{\b$NonptrType(\s*\*[\s\*]*(?:$Modifier\s*)?)($Ident)}) { + my ($from, $to, $ident) = ($1, $1, $2); - } elsif ($line =~ m{\b$NonptrType\s+(\*+)(?!\s+(?:$Attribute|$Sparse))\s+[A-Za-z\d_]+}) { - ERROR("\"foo $1 bar\" should be \"foo $1bar\"\n" . - $herecurr); + # Should start with a space. + $to =~ s/^(\S)/ $1/; + # Should not end with a space. + $to =~ s/\s+$//; + # '*'s should not have spaces between. + while ($to =~ s/(.)\s\*/$1\*/) { + } + # Modifiers should have spaces. + $to =~ s/(\b$Modifier$)/$1 /; + + #print "from<$from> to<$to>\n"; + if ($from ne $to) { + ERROR("\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr); + } } # # no BUG() or BUG_ON() @@ -1759,7 +1821,7 @@ sub process { $c = 'C' if ($elements[$n + 2] =~ /^$;/); $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/); $c = 'O' if ($elements[$n + 2] eq ''); - $c = 'E' if ($elements[$n + 2] =~ /\s*\\$/); + $c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/); } else { $c = 'E'; } @@ -1950,9 +2012,9 @@ sub process { my $spacing = $1; my $value = $2; - # Flatten any parentheses and braces + # Flatten any parentheses $value =~ s/\)\(/\) \(/g; - while ($value =~ s/\([^\(\)]*\)/1/) { + while ($value !~ /(?:$Ident|-?$Constant)\s*$Compare\s*(?:$Ident|-?$Constant)/ && $value =~ s/\([^\(\)]*\)/1/) { } if ($value =~ /^(?:$Ident|-?$Constant)$/) { @@ -1992,7 +2054,7 @@ sub process { $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { my ($s, $c) = ($stat, $cond); - if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/) { + if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) { ERROR("do not use assignment in if condition\n" . $herecurr); } @@ -2167,9 +2229,10 @@ sub process { MODULE_PARAM_DESC| DECLARE_PER_CPU| DEFINE_PER_CPU| - __typeof__\( + __typeof__\(| + \.$Ident\s*=\s* }x; - #print "REST<$rest>\n"; + #print "REST<$rest> dstat<$dstat>\n"; if ($rest ne '') { if ($rest !~ /while\s*\(/ && $dstat !~ /$exceptions/) @@ -2189,6 +2252,15 @@ sub process { } } +# make sure symbols are always wrapped with VMLINUX_SYMBOL() ... +# all assignments may have only one of the following with an assignment: +# . +# ALIGN(...) +# VMLINUX_SYMBOL(...) + if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) { + WARN("vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr); + } + # check for redundant bracing round if etc if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) { my ($level, $endln, @chunks) = @@ -2443,6 +2515,11 @@ sub process { if ($line =~ /^.\s*__initcall\s*\(/) { WARN("please use device_initcall() instead of __initcall()\n" . $herecurr); } +# check for struct file_operations, ensure they are const. + if ($line =~ /\bstruct\s+file_operations\b/ && + $line !~ /\bconst\b/) { + WARN("struct file_operations should normally be const\n" . $herecurr); + } # use of NR_CPUS is usually wrong # ignore definitions of NR_CPUS and usage to define arrays as likely right @@ -2466,6 +2543,15 @@ sub process { last; } } + +# whine mightly about in_atomic + if ($line =~ /\bin_atomic\s*\(/) { + if ($realfile =~ m@^drivers/@) { + ERROR("do not use in_atomic in drivers\n" . $herecurr); + } else { + WARN("use of in_atomic() is incorrect outside core kernel code\n" . $herecurr); + } + } } # If we have no input at all, then there is nothing to report on diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl new file mode 100644 index 000000000000..700a7a654a3f --- /dev/null +++ b/scripts/markup_oops.pl @@ -0,0 +1,162 @@ +#!/usr/bin/perl -w + +# Copyright 2008, Intel Corporation +# +# This file is part of the Linux kernel +# +# This program file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License. +# +# Authors: +# Arjan van de Ven <arjan@linux.intel.com> + + +my $vmlinux_name = $ARGV[0]; + +# +# Step 1: Parse the oops to find the EIP value +# + +my $target = "0"; +while (<STDIN>) { + if ($_ =~ /EIP: 0060:\[\<([a-z0-9]+)\>\]/) { + $target = $1; + } +} + +if ($target =~ /^f8/) { + print "This script does not work on modules ... \n"; + exit; +} + +if ($target eq "0") { + print "No oops found!\n"; + print "Usage: \n"; + print " dmesg | perl scripts/markup_oops.pl vmlinux\n"; + exit; +} + +my $counter = 0; +my $state = 0; +my $center = 0; +my @lines; + +sub InRange { + my ($address, $target) = @_; + my $ad = "0x".$address; + my $ta = "0x".$target; + my $delta = hex($ad) - hex($ta); + + if (($delta > -4096) && ($delta < 4096)) { + return 1; + } + return 0; +} + + + +# first, parse the input into the lines array, but to keep size down, +# we only do this for 4Kb around the sweet spot + +my $filename; + +open(FILE, "objdump -dS $vmlinux_name |") || die "Cannot start objdump"; + +while (<FILE>) { + my $line = $_; + chomp($line); + if ($state == 0) { + if ($line =~ /^([a-f0-9]+)\:/) { + if (InRange($1, $target)) { + $state = 1; + } + } + } else { + if ($line =~ /^([a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]+)\:/) { + my $val = $1; + if (!InRange($val, $target)) { + last; + } + if ($val eq $target) { + $center = $counter; + } + } + $lines[$counter] = $line; + + $counter = $counter + 1; + } +} + +close(FILE); + +if ($counter == 0) { + print "No matching code found \n"; + exit; +} + +if ($center == 0) { + print "No matching code found \n"; + exit; +} + +my $start; +my $finish; +my $codelines = 0; +my $binarylines = 0; +# now we go up and down in the array to find how much we want to print + +$start = $center; + +while ($start > 1) { + $start = $start - 1; + my $line = $lines[$start]; + if ($line =~ /^([a-f0-9]+)\:/) { + $binarylines = $binarylines + 1; + } else { + $codelines = $codelines + 1; + } + if ($codelines > 10) { + last; + } + if ($binarylines > 20) { + last; + } +} + + +$finish = $center; +$codelines = 0; +$binarylines = 0; +while ($finish < $counter) { + $finish = $finish + 1; + my $line = $lines[$finish]; + if ($line =~ /^([a-f0-9]+)\:/) { + $binarylines = $binarylines + 1; + } else { + $codelines = $codelines + 1; + } + if ($codelines > 10) { + last; + } + if ($binarylines > 20) { + last; + } +} + + +my $i; + +my $fulltext = ""; +$i = $start; +while ($i < $finish) { + if ($i == $center) { + $fulltext = $fulltext . "*$lines[$i] <----- faulting instruction\n"; + } else { + $fulltext = $fulltext . " $lines[$i]\n"; + } + $i = $i +1; +} + +print $fulltext; + diff --git a/sound/core/sound.c b/sound/core/sound.c index 44a69bb8d4f0..7872a02f6ca9 100644 --- a/sound/core/sound.c +++ b/sound/core/sound.c @@ -152,6 +152,10 @@ static int __snd_open(struct inode *inode, struct file *file) } old_fops = file->f_op; file->f_op = fops_get(mptr->f_ops); + if (file->f_op == NULL) { + file->f_op = old_fops; + return -ENODEV; + } if (file->f_op->open) err = file->f_op->open(inode, file); if (err) { |