diff options
339 files changed, 6882 insertions, 4754 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback new file mode 100644 index 000000000000..8bb43b66eb55 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback @@ -0,0 +1,17 @@ +What: /sys/module/xen_blkback/parameters/max_buffer_pages +Date: March 2013 +KernelVersion: 3.11 +Contact: Roger Pau Monné <roger.pau@citrix.com> +Description: + Maximum number of free pages to keep in each block + backend buffer. + +What: /sys/module/xen_blkback/parameters/max_persistent_grants +Date: March 2013 +KernelVersion: 3.11 +Contact: Roger Pau Monné <roger.pau@citrix.com> +Description: + Maximum number of grants to map persistently in + blkback. If the frontend tries to use more than + max_persistent_grants, the LRU kicks in and starts + removing 5% of max_persistent_grants every 100ms. diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront new file mode 100644 index 000000000000..c0a6cb7eb314 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront @@ -0,0 +1,10 @@ +What: /sys/module/xen_blkfront/parameters/max +Date: June 2013 +KernelVersion: 3.11 +Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Description: + Maximum number of segments that the frontend will negotiate + with the backend for indirect descriptors. The default value + is 32 - higher value means more potential throughput but more + memory usage. The backend picks the minimum of the frontend + and its default backend value. diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt index c3365f26b2d9..32b6c3189d98 100644 --- a/Documentation/bcache.txt +++ b/Documentation/bcache.txt @@ -46,29 +46,33 @@ you format your backing devices and cache device at the same time, you won't have to manually attach: make-bcache -B /dev/sda /dev/sdb -C /dev/sdc -To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register: +bcache-tools now ships udev rules, and bcache devices are known to the kernel +immediately. Without udev, you can manually register devices like this: echo /dev/sdb > /sys/fs/bcache/register echo /dev/sdc > /sys/fs/bcache/register -To register your bcache devices automatically, you could add something like -this to an init script: +Registering the backing device makes the bcache device show up in /dev; you can +now format it and use it as normal. But the first time using a new bcache +device, it'll be running in passthrough mode until you attach it to a cache. +See the section on attaching. - echo /dev/sd* > /sys/fs/bcache/register_quiet +The devices show up as: -It'll look for bcache superblocks and ignore everything that doesn't have one. + /dev/bcache<N> -Registering the backing device makes the bcache show up in /dev; you can now -format it and use it as normal. But the first time using a new bcache device, -it'll be running in passthrough mode until you attach it to a cache. See the -section on attaching. +As well as (with udev): -The devices show up at /dev/bcacheN, and can be controlled via sysfs from -/sys/block/bcacheN/bcache: + /dev/bcache/by-uuid/<uuid> + /dev/bcache/by-label/<label> + +To get started: mkfs.ext4 /dev/bcache0 mount /dev/bcache0 /mnt +You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache . + Cache devices are managed as sets; multiple caches per set isn't supported yet but will allow for mirroring of metadata and dirty data in the future. Your new cache set shows up as /sys/fs/bcache/<UUID> @@ -80,11 +84,11 @@ must be attached to your cache set to enable caching. Attaching a backing device to a cache set is done thusly, with the UUID of the cache set in /sys/fs/bcache: - echo <UUID> > /sys/block/bcache0/bcache/attach + echo <CSET-UUID> > /sys/block/bcache0/bcache/attach This only has to be done once. The next time you reboot, just reregister all your bcache devices. If a backing device has data in a cache somewhere, the -/dev/bcache# device won't be created until the cache shows up - particularly +/dev/bcache<N> device won't be created until the cache shows up - particularly important if you have writeback caching turned on. If you're booting up and your cache device is gone and never coming back, you @@ -191,6 +195,9 @@ want for getting the best possible numbers when benchmarking. SYSFS - BACKING DEVICE: +Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and +(if attached) /sys/fs/bcache/<cset-uuid>/bdev* + attach Echo the UUID of a cache set to this file to enable caching. @@ -300,6 +307,8 @@ cache_readaheads SYSFS - CACHE SET: +Available at /sys/fs/bcache/<cset-uuid> + average_key_size Average data per key in the btree. @@ -390,6 +399,8 @@ trigger_gc SYSFS - CACHE DEVICE: +Available at /sys/block/<cdev>/bcache + block_size Minimum granularity of writes - should match hardware sector size. diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.txt b/Documentation/devicetree/bindings/clock/imx27-clock.txt index ab1a56e9de9d..7a2070393732 100644 --- a/Documentation/devicetree/bindings/clock/imx27-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx27-clock.txt @@ -98,6 +98,7 @@ clocks and IDs. fpm 83 mpll_osc_sel 84 mpll_sel 85 + spll_gate 86 Examples: diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index d5a79caec147..366ce9b87240 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -26,6 +26,7 @@ est ESTeem Wireless Modems fsl Freescale Semiconductor GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc. gef GE Fanuc Intelligent Platforms Embedded Systems, Inc. +hisilicon Hisilicon Limited. hp Hewlett Packard ibm International Business Machines (IBM) idt Integrated Device Technologies, Inc. @@ -43,6 +44,7 @@ nxp NXP Semiconductors onnn ON Semiconductor Corp. picochip Picochip Ltd powervr PowerVR (deprecated, use img) +qca Qualcomm Atheros, Inc. qcom Qualcomm, Inc. ralink Mediatek/Ralink Technology Corp. ramtron Ramtron International diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO index 050d37fe6d40..8148a47fc70e 100644 --- a/Documentation/ja_JP/HOWTO +++ b/Documentation/ja_JP/HOWTO @@ -11,14 +11,14 @@ for non English (read: Japanese) speakers and is not intended as a fork. So if you have any comments or updates for this file, please try to update the original English file first. -Last Updated: 2011/03/31 +Last Updated: 2013/07/19 ================================== これは、 -linux-2.6.38/Documentation/HOWTO +linux-3.10/Documentation/HOWTO の和訳です。 -翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > -翻訳日: 2011/3/28 +翻訳団体: JF プロジェクト < http://linuxjf.sourceforge.jp/ > +翻訳日: 2013/7/19 翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com> 校正者: 松倉さん <nbh--mats at nifty dot com> 小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp> @@ -245,7 +245,7 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、 自己参照方式で、索引がついた web 形式で、ソースコードを参照することが できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり ます- - http://sosdg.org/~qiyong/lxr/ + http://lxr.linux.no/+trees 開発プロセス ----------------------- @@ -253,24 +253,24 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、 Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン チ」と多数のサブシステム毎のカーネルブランチから構成されます。 これらのブランチとは- - - メインの 2.6.x カーネルツリー - - 2.6.x.y -stable カーネルツリー - - 2.6.x -git カーネルパッチ + - メインの 3.x カーネルツリー + - 3.x.y -stable カーネルツリー + - 3.x -git カーネルパッチ - サブシステム毎のカーネルツリーとパッチ - - 統合テストのための 2.6.x -next カーネルツリー + - 統合テストのための 3.x -next カーネルツリー -2.6.x カーネルツリー +3.x カーネルツリー ----------------- -2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org -の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは +3.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org +の pub/linux/kernel/v3.x/ ディレクトリに存在します。この開発プロセスは 以下のとおり- - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、 この期間中に、メンテナ達は Linus に大きな差分を送ることができます。 このような差分は通常 -next カーネルに数週間含まれてきたパッチです。 大きな変更は git(カーネルのソース管理ツール、詳細は - http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ + http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ チファイルの形式のまま送るのでも十分です。 - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定 @@ -302,20 +302,20 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー 実に認識されたバグの状況によりリリースされるのであり、前もって決めら れた計画によってリリースされるものではないからです。」 -2.6.x.y -stable カーネルツリー +3.x.y -stable カーネルツリー --------------------------- -バージョン番号が4つの数字に分かれているカーネルは -stable カーネルです。 -これには、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対 +バージョン番号が3つの数字に分かれているカーネルは -stable カーネルです。 +これには、3.x カーネルで見つかったセキュリティ問題や重大な後戻りに対 する比較的小さい重要な修正が含まれます。 これは、開発/実験的バージョンのテストに協力することに興味が無く、 最新の安定したカーネルを使いたいユーザに推奨するブランチです。 -もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x が +もし、3.x.y カーネルが存在しない場合には、番号が一番大きい 3.x が 最新の安定版カーネルです。 -2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必 +3.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必 要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差し迫っ た問題がなければもう少し長くなることもあります。セキュリティ関連の問題 の場合はこれに対してだいたいの場合、すぐにリリースがされます。 @@ -324,7 +324,7 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ リースプロセスがどう動くかが記述されています。 -2.6.x -git パッチ +3.x -git パッチ ------------------ git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ @@ -358,14 +358,14 @@ quilt シリーズとして公開されているパッチキューも使われ をつけることができます。大部分のこれらの patchwork のサイトは http://patchwork.kernel.org/ でリストされています。 -統合テストのための 2.6.x -next カーネルツリー +統合テストのための 3.x -next カーネルツリー --------------------------------------------- -サブシステムツリーの更新内容がメインラインの 2.6.x ツリーにマージされ +サブシステムツリーの更新内容がメインラインの 3.x ツリーにマージされ る前に、それらは統合テストされる必要があります。この目的のため、実質的 に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ ポジトリが存在します- - http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git + http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git http://linux.f-seidel.de/linux-next/pmwiki/ このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン diff --git a/MAINTAINERS b/MAINTAINERS index bf61e04291ab..a26b10e52aea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1642,7 +1642,7 @@ S: Maintained F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) -M: Kent Overstreet <koverstreet@google.com> +M: Kent Overstreet <kmo@daterainc.com> L: linux-bcache@vger.kernel.org W: http://bcache.evilpiepirate.org S: Maintained: @@ -3346,7 +3346,7 @@ F: Documentation/firmware_class/ F: drivers/base/firmware*.c F: include/linux/firmware.h -FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card) +FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card) M: Joshua Morris <josh.h.morris@us.ibm.com> M: Philip Kelleher <pjk1939@linux.vnet.ibm.com> S: Maintained @@ -3622,11 +3622,9 @@ F: drivers/isdn/gigaset/ F: include/uapi/linux/gigaset_dev.h GPIO SUBSYSTEM -M: Grant Likely <grant.likely@linaro.org> M: Linus Walleij <linus.walleij@linaro.org> S: Maintained L: linux-gpio@vger.kernel.org -T: git git://git.secretlab.ca/git/linux-2.6.git F: Documentation/gpio.txt F: drivers/gpio/ F: include/linux/gpio* @@ -4472,8 +4470,6 @@ F: drivers/irqchip/ IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) M: Benjamin Herrenschmidt <benh@kernel.crashing.org> -M: Grant Likely <grant.likely@linaro.org> -T: git git://git.secretlab.ca/git/linux-2.6.git irqdomain/next S: Maintained F: Documentation/IRQ-domain.txt F: include/linux/irqdomain.h @@ -4990,7 +4986,7 @@ F: arch/powerpc/platforms/44x/ LINUX FOR POWERPC EMBEDDED XILINX VIRTEX L: linuxppc-dev@lists.ozlabs.org -S: Unmaintained +S: Orphan F: arch/powerpc/*/*virtex* F: arch/powerpc/*/*/*virtex* @@ -5886,7 +5882,7 @@ OMAP DEVICE TREE SUPPORT M: Benoît Cousson <b-cousson@ti.com> M: Tony Lindgren <tony@atomide.com> L: linux-omap@vger.kernel.org -L: devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers) +L: devicetree@vger.kernel.org S: Maintained F: arch/arm/boot/dts/*omap* F: arch/arm/boot/dts/*am3* @@ -6050,17 +6046,28 @@ F: drivers/i2c/busses/i2c-ocores.c OPEN FIRMWARE AND FLATTENED DEVICE TREE M: Grant Likely <grant.likely@linaro.org> M: Rob Herring <rob.herring@calxeda.com> -L: devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers) +L: devicetree@vger.kernel.org W: http://fdt.secretlab.ca T: git git://git.secretlab.ca/git/linux-2.6.git S: Maintained -F: Documentation/devicetree -F: drivers/of +F: drivers/of/ F: include/linux/of*.h -F: scripts/dtc +F: scripts/dtc/ K: of_get_property K: of_match_table +OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS +M: Rob Herring <rob.herring@calxeda.com> +M: Pawel Moll <pawel.moll@arm.com> +M: Mark Rutland <mark.rutland@arm.com> +M: Stephen Warren <swarren@wwwdotorg.org> +M: Ian Campbell <ian.campbell@citrix.com> +L: devicetree@vger.kernel.org +S: Maintained +F: Documentation/devicetree/ +F: arch/*/boot/dts/ +F: include/dt-bindings/ + OPENRISC ARCHITECTURE M: Jonas Bonn <jonas@southpole.se> W: http://openrisc.net @@ -7746,7 +7753,6 @@ F: drivers/clk/spear/ SPI SUBSYSTEM M: Mark Brown <broonie@kernel.org> -M: Grant Likely <grant.likely@linaro.org> L: linux-spi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git Q: http://patchwork.kernel.org/project/spi-devel-general/list/ @@ -7812,7 +7818,7 @@ F: drivers/staging/asus_oled/ STAGING - COMEDI M: Ian Abbott <abbotti@mev.co.uk> -M: Mori Hess <fmhess@users.sourceforge.net> +M: H Hartley Sweeten <hsweeten@visionengravers.com> S: Odd Fixes F: drivers/staging/comedi/ @@ -9288,7 +9294,7 @@ S: Maintained F: drivers/net/ethernet/xilinx/xilinx_axienet* XILINX SYSTEMACE DRIVER -S: Unmaintained +S: Orphan F: drivers/block/xsysace.c XILINX UARTLITE SERIAL DRIVER @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Linux for Workgroups # *DOCUMENTATION* diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 837a1f2d8b96..082d9b4b5472 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -15,6 +15,7 @@ config ALPHA select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE select GENERIC_STRNCPY_FROM_USER diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index c2cbe4fc391c..78b03ef39f6f 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v) */ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; - c = atomic_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c; + int c, new, old; + smp_mb(); + __asm__ __volatile__( + "1: ldl_l %[old],%[mem]\n" + " cmpeq %[old],%[u],%[c]\n" + " addl %[old],%[a],%[new]\n" + " bne %[c],2f\n" + " stl_c %[new],%[mem]\n" + " beq %[new],3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c) + : [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u) + : "memory"); + smp_mb(); + return old; } @@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. + * Returns true iff @v was not @u. */ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) { - long c, old; - c = atomic64_read(v); - for (;;) { - if (unlikely(c == (u))) - break; - old = atomic64_cmpxchg((v), c, c + (a)); - if (likely(old == c)) - break; - c = old; - } - return c != (u); + long c, tmp; + smp_mb(); + __asm__ __volatile__( + "1: ldq_l %[tmp],%[mem]\n" + " cmpeq %[tmp],%[u],%[c]\n" + " addq %[tmp],%[a],%[tmp]\n" + " bne %[c],2f\n" + " stq_c %[tmp],%[mem]\n" + " beq %[tmp],3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : [tmp] "=&r"(tmp), [c] "=&r"(c) + : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u) + : "memory"); + smp_mb(); + return !c; +} + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long old, tmp; + smp_mb(); + __asm__ __volatile__( + "1: ldq_l %[old],%[mem]\n" + " subq %[old],1,%[tmp]\n" + " ble %[old],2f\n" + " stq_c %[tmp],%[mem]\n" + " beq %[tmp],3f\n" + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : [old] "=&r"(old), [tmp] "=&r"(tmp) + : [mem] "m"(*v) + : "memory"); + smp_mb(); + return old - 1; } #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h index bf46af51941b..a5b68b268bcf 100644 --- a/arch/alpha/include/asm/param.h +++ b/arch/alpha/include/asm/param.h @@ -3,7 +3,9 @@ #include <uapi/asm/param.h> -#define HZ CONFIG_HZ -#define USER_HZ HZ -# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +# undef HZ +# define HZ CONFIG_HZ +# define USER_HZ 1024 +# define CLOCKS_PER_SEC USER_HZ /* frequency at which times() counts */ + #endif /* _ASM_ALPHA_PARAM_H */ diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 3bba21e41b81..37b570d01202 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 43baee17acdf..f2c94402e2c8 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -3,8 +3,7 @@ #include <uapi/asm/unistd.h> - -#define NR_SYSCALLS 506 +#define NR_SYSCALLS 508 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h index 29daed819ebd..dbcd9834af6d 100644 --- a/arch/alpha/include/uapi/asm/param.h +++ b/arch/alpha/include/uapi/asm/param.h @@ -1,13 +1,7 @@ #ifndef _UAPI_ASM_ALPHA_PARAM_H #define _UAPI_ASM_ALPHA_PARAM_H -/* ??? Gross. I don't want to parameterize this, and supposedly the - hardware ignores reprogramming. We also need userland buy-in to the - change in HZ, since this is visible in the wait4 resources etc. */ - -#ifndef __KERNEL__ #define HZ 1024 -#endif #define EXEC_PAGESIZE 8192 @@ -17,5 +11,4 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ - #endif /* _UAPI_ASM_ALPHA_PARAM_H */ diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index 801d28bcea51..53ae7bb1bfd1 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -467,5 +467,7 @@ #define __NR_sendmmsg 503 #define __NR_process_vm_readv 504 #define __NR_process_vm_writev 505 +#define __NR_kcmp 506 +#define __NR_finit_module 507 #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index f62a994ef126..a969b95ee5ac 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -12,11 +12,32 @@ .text .set noat + .cfi_sections .debug_frame /* Stack offsets. */ #define SP_OFF 184 #define SWITCH_STACK_SIZE 320 +.macro CFI_START_OSF_FRAME func + .align 4 + .globl \func + .type \func,@function +\func: + .cfi_startproc simple + .cfi_return_column 64 + .cfi_def_cfa $sp, 48 + .cfi_rel_offset 64, 8 + .cfi_rel_offset $gp, 16 + .cfi_rel_offset $16, 24 + .cfi_rel_offset $17, 32 + .cfi_rel_offset $18, 40 +.endm + +.macro CFI_END_OSF_FRAME func + .cfi_endproc + .size \func, . - \func +.endm + /* * This defines the normal kernel pt-regs layout. * @@ -27,100 +48,158 @@ * the palcode-provided values are available to the signal handler. */ -#define SAVE_ALL \ - subq $sp, SP_OFF, $sp; \ - stq $0, 0($sp); \ - stq $1, 8($sp); \ - stq $2, 16($sp); \ - stq $3, 24($sp); \ - stq $4, 32($sp); \ - stq $28, 144($sp); \ - lda $2, alpha_mv; \ - stq $5, 40($sp); \ - stq $6, 48($sp); \ - stq $7, 56($sp); \ - stq $8, 64($sp); \ - stq $19, 72($sp); \ - stq $20, 80($sp); \ - stq $21, 88($sp); \ - ldq $2, HAE_CACHE($2); \ - stq $22, 96($sp); \ - stq $23, 104($sp); \ - stq $24, 112($sp); \ - stq $25, 120($sp); \ - stq $26, 128($sp); \ - stq $27, 136($sp); \ - stq $2, 152($sp); \ - stq $16, 160($sp); \ - stq $17, 168($sp); \ +.macro SAVE_ALL + subq $sp, SP_OFF, $sp + .cfi_adjust_cfa_offset SP_OFF + stq $0, 0($sp) + stq $1, 8($sp) + stq $2, 16($sp) + stq $3, 24($sp) + stq $4, 32($sp) + stq $28, 144($sp) + .cfi_rel_offset $0, 0 + .cfi_rel_offset $1, 8 + .cfi_rel_offset $2, 16 + .cfi_rel_offset $3, 24 + .cfi_rel_offset $4, 32 + .cfi_rel_offset $28, 144 + lda $2, alpha_mv + stq $5, 40($sp) + stq $6, 48($sp) + stq $7, 56($sp) + stq $8, 64($sp) + stq $19, 72($sp) + stq $20, 80($sp) + stq $21, 88($sp) + ldq $2, HAE_CACHE($2) + stq $22, 96($sp) + stq $23, 104($sp) + stq $24, 112($sp) + stq $25, 120($sp) + stq $26, 128($sp) + stq $27, 136($sp) + stq $2, 152($sp) + stq $16, 160($sp) + stq $17, 168($sp) stq $18, 176($sp) + .cfi_rel_offset $5, 40 + .cfi_rel_offset $6, 48 + .cfi_rel_offset $7, 56 + .cfi_rel_offset $8, 64 + .cfi_rel_offset $19, 72 + .cfi_rel_offset $20, 80 + .cfi_rel_offset $21, 88 + .cfi_rel_offset $22, 96 + .cfi_rel_offset $23, 104 + .cfi_rel_offset $24, 112 + .cfi_rel_offset $25, 120 + .cfi_rel_offset $26, 128 + .cfi_rel_offset $27, 136 +.endm -#define RESTORE_ALL \ - lda $19, alpha_mv; \ - ldq $0, 0($sp); \ - ldq $1, 8($sp); \ - ldq $2, 16($sp); \ - ldq $3, 24($sp); \ - ldq $21, 152($sp); \ - ldq $20, HAE_CACHE($19); \ - ldq $4, 32($sp); \ - ldq $5, 40($sp); \ - ldq $6, 48($sp); \ - ldq $7, 56($sp); \ - subq $20, $21, $20; \ - ldq $8, 64($sp); \ - beq $20, 99f; \ - ldq $20, HAE_REG($19); \ - stq $21, HAE_CACHE($19); \ - stq $21, 0($20); \ -99:; \ - ldq $19, 72($sp); \ - ldq $20, 80($sp); \ - ldq $21, 88($sp); \ - ldq $22, 96($sp); \ - ldq $23, 104($sp); \ - ldq $24, 112($sp); \ - ldq $25, 120($sp); \ - ldq $26, 128($sp); \ - ldq $27, 136($sp); \ - ldq $28, 144($sp); \ +.macro RESTORE_ALL + lda $19, alpha_mv + ldq $0, 0($sp) + ldq $1, 8($sp) + ldq $2, 16($sp) + ldq $3, 24($sp) + ldq $21, 152($sp) + ldq $20, HAE_CACHE($19) + ldq $4, 32($sp) + ldq $5, 40($sp) + ldq $6, 48($sp) + ldq $7, 56($sp) + subq $20, $21, $20 + ldq $8, 64($sp) + beq $20, 99f + ldq $20, HAE_REG($19) + stq $21, HAE_CACHE($19) + stq $21, 0($20) +99: ldq $19, 72($sp) + ldq $20, 80($sp) + ldq $21, 88($sp) + ldq $22, 96($sp) + ldq $23, 104($sp) + ldq $24, 112($sp) + ldq $25, 120($sp) + ldq $26, 128($sp) + ldq $27, 136($sp) + ldq $28, 144($sp) addq $sp, SP_OFF, $sp + .cfi_restore $0 + .cfi_restore $1 + .cfi_restore $2 + .cfi_restore $3 + .cfi_restore $4 + .cfi_restore $5 + .cfi_restore $6 + .cfi_restore $7 + .cfi_restore $8 + .cfi_restore $19 + .cfi_restore $20 + .cfi_restore $21 + .cfi_restore $22 + .cfi_restore $23 + .cfi_restore $24 + .cfi_restore $25 + .cfi_restore $26 + .cfi_restore $27 + .cfi_restore $28 + .cfi_adjust_cfa_offset -SP_OFF +.endm + +.macro DO_SWITCH_STACK + bsr $1, do_switch_stack + .cfi_adjust_cfa_offset SWITCH_STACK_SIZE + .cfi_rel_offset $9, 0 + .cfi_rel_offset $10, 8 + .cfi_rel_offset $11, 16 + .cfi_rel_offset $12, 24 + .cfi_rel_offset $13, 32 + .cfi_rel_offset $14, 40 + .cfi_rel_offset $15, 48 + /* We don't really care about the FP registers for debugging. */ +.endm + +.macro UNDO_SWITCH_STACK + bsr $1, undo_switch_stack + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_adjust_cfa_offset -SWITCH_STACK_SIZE +.endm /* * Non-syscall kernel entry points. */ - .align 4 - .globl entInt - .ent entInt -entInt: +CFI_START_OSF_FRAME entInt SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $19 jsr $31, do_entInt -.end entInt +CFI_END_OSF_FRAME entInt - .align 4 - .globl entArith - .ent entArith -entArith: +CFI_START_OSF_FRAME entArith SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $18 jsr $31, do_entArith -.end entArith +CFI_END_OSF_FRAME entArith - .align 4 - .globl entMM - .ent entMM -entMM: +CFI_START_OSF_FRAME entMM SAVE_ALL /* save $9 - $15 so the inline exception code can manipulate them. */ subq $sp, 56, $sp + .cfi_adjust_cfa_offset 56 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -128,6 +207,13 @@ entMM: stq $13, 32($sp) stq $14, 40($sp) stq $15, 48($sp) + .cfi_rel_offset $9, 0 + .cfi_rel_offset $10, 8 + .cfi_rel_offset $11, 16 + .cfi_rel_offset $12, 24 + .cfi_rel_offset $13, 32 + .cfi_rel_offset $14, 40 + .cfi_rel_offset $15, 48 addq $sp, 56, $19 /* handle the fault */ lda $8, 0x3fff @@ -142,28 +228,33 @@ entMM: ldq $14, 40($sp) ldq $15, 48($sp) addq $sp, 56, $sp + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_adjust_cfa_offset -56 /* finish up the syscall as normal. */ br ret_from_sys_call -.end entMM +CFI_END_OSF_FRAME entMM - .align 4 - .globl entIF - .ent entIF -entIF: +CFI_START_OSF_FRAME entIF SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $17 jsr $31, do_entIF -.end entIF +CFI_END_OSF_FRAME entIF - .align 4 - .globl entUna - .ent entUna -entUna: +CFI_START_OSF_FRAME entUna lda $sp, -256($sp) + .cfi_adjust_cfa_offset 256 stq $0, 0($sp) + .cfi_rel_offset $0, 0 + .cfi_remember_state ldq $0, 256($sp) /* get PS */ stq $1, 8($sp) stq $2, 16($sp) @@ -195,6 +286,32 @@ entUna: stq $28, 224($sp) mov $sp, $19 stq $gp, 232($sp) + .cfi_rel_offset $1, 1*8 + .cfi_rel_offset $2, 2*8 + .cfi_rel_offset $3, 3*8 + .cfi_rel_offset $4, 4*8 + .cfi_rel_offset $5, 5*8 + .cfi_rel_offset $6, 6*8 + .cfi_rel_offset $7, 7*8 + .cfi_rel_offset $8, 8*8 + .cfi_rel_offset $9, 9*8 + .cfi_rel_offset $10, 10*8 + .cfi_rel_offset $11, 11*8 + .cfi_rel_offset $12, 12*8 + .cfi_rel_offset $13, 13*8 + .cfi_rel_offset $14, 14*8 + .cfi_rel_offset $15, 15*8 + .cfi_rel_offset $19, 19*8 + .cfi_rel_offset $20, 20*8 + .cfi_rel_offset $21, 21*8 + .cfi_rel_offset $22, 22*8 + .cfi_rel_offset $23, 23*8 + .cfi_rel_offset $24, 24*8 + .cfi_rel_offset $25, 25*8 + .cfi_rel_offset $26, 26*8 + .cfi_rel_offset $27, 27*8 + .cfi_rel_offset $28, 28*8 + .cfi_rel_offset $29, 29*8 lda $8, 0x3fff stq $31, 248($sp) bic $sp, $8, $8 @@ -228,16 +345,45 @@ entUna: ldq $28, 224($sp) ldq $gp, 232($sp) lda $sp, 256($sp) + .cfi_restore $1 + .cfi_restore $2 + .cfi_restore $3 + .cfi_restore $4 + .cfi_restore $5 + .cfi_restore $6 + .cfi_restore $7 + .cfi_restore $8 + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_restore $19 + .cfi_restore $20 + .cfi_restore $21 + .cfi_restore $22 + .cfi_restore $23 + .cfi_restore $24 + .cfi_restore $25 + .cfi_restore $26 + .cfi_restore $27 + .cfi_restore $28 + .cfi_restore $29 + .cfi_adjust_cfa_offset -256 call_pal PAL_rti -.end entUna .align 4 - .ent entUnaUser entUnaUser: + .cfi_restore_state ldq $0, 0($sp) /* restore original $0 */ lda $sp, 256($sp) /* pop entUna's stack frame */ + .cfi_restore $0 + .cfi_adjust_cfa_offset -256 SAVE_ALL /* setup normal kernel stack */ lda $sp, -56($sp) + .cfi_adjust_cfa_offset 56 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -245,6 +391,13 @@ entUnaUser: stq $13, 32($sp) stq $14, 40($sp) stq $15, 48($sp) + .cfi_rel_offset $9, 0 + .cfi_rel_offset $10, 8 + .cfi_rel_offset $11, 16 + .cfi_rel_offset $12, 24 + .cfi_rel_offset $13, 32 + .cfi_rel_offset $14, 40 + .cfi_rel_offset $15, 48 lda $8, 0x3fff addq $sp, 56, $19 bic $sp, $8, $8 @@ -257,20 +410,25 @@ entUnaUser: ldq $14, 40($sp) ldq $15, 48($sp) lda $sp, 56($sp) + .cfi_restore $9 + .cfi_restore $10 + .cfi_restore $11 + .cfi_restore $12 + .cfi_restore $13 + .cfi_restore $14 + .cfi_restore $15 + .cfi_adjust_cfa_offset -56 br ret_from_sys_call -.end entUnaUser +CFI_END_OSF_FRAME entUna - .align 4 - .globl entDbg - .ent entDbg -entDbg: +CFI_START_OSF_FRAME entDbg SAVE_ALL lda $8, 0x3fff lda $26, ret_from_sys_call bic $sp, $8, $8 mov $sp, $16 jsr $31, do_entDbg -.end entDbg +CFI_END_OSF_FRAME entDbg /* * The system call entry point is special. Most importantly, it looks @@ -285,8 +443,12 @@ entDbg: .align 4 .globl entSys - .globl ret_from_sys_call - .ent entSys + .type entSys, @function + .cfi_startproc simple + .cfi_return_column 64 + .cfi_def_cfa $sp, 48 + .cfi_rel_offset 64, 8 + .cfi_rel_offset $gp, 16 entSys: SAVE_ALL lda $8, 0x3fff @@ -300,6 +462,9 @@ entSys: stq $17, SP_OFF+32($sp) s8addq $0, $5, $5 stq $18, SP_OFF+40($sp) + .cfi_rel_offset $16, SP_OFF+24 + .cfi_rel_offset $17, SP_OFF+32 + .cfi_rel_offset $18, SP_OFF+40 blbs $3, strace beq $4, 1f ldq $27, 0($5) @@ -310,6 +475,7 @@ entSys: stq $31, 72($sp) /* a3=0 => no error */ .align 4 + .globl ret_from_sys_call ret_from_sys_call: cmovne $26, 0, $18 /* $18 = 0 => non-restartable */ ldq $0, SP_OFF($sp) @@ -324,10 +490,12 @@ ret_to_user: and $17, _TIF_WORK_MASK, $2 bne $2, work_pending restore_all: + .cfi_remember_state RESTORE_ALL call_pal PAL_rti ret_to_kernel: + .cfi_restore_state lda $16, 7 call_pal PAL_swpipl br restore_all @@ -356,7 +524,6 @@ $ret_success: stq $0, 0($sp) stq $31, 72($sp) /* a3=0 => no error */ br ret_from_sys_call -.end entSys /* * Do all cleanup when returning from all interrupts and system calls. @@ -370,7 +537,7 @@ $ret_success: */ .align 4 - .ent work_pending + .type work_pending, @function work_pending: and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 bne $2, $work_notifysig @@ -387,23 +554,22 @@ $work_resched: $work_notifysig: mov $sp, $16 - bsr $1, do_switch_stack + DO_SWITCH_STACK jsr $26, do_work_pending - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK br restore_all -.end work_pending /* * PTRACE syscall handler */ .align 4 - .ent strace + .type strace, @function strace: /* set up signal stack, call syscall_trace */ - bsr $1, do_switch_stack + DO_SWITCH_STACK jsr $26, syscall_trace_enter /* returns the syscall number */ - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK /* get the arguments back.. */ ldq $16, SP_OFF+24($sp) @@ -431,9 +597,9 @@ ret_from_straced: $strace_success: stq $0, 0($sp) /* save return value */ - bsr $1, do_switch_stack + DO_SWITCH_STACK jsr $26, syscall_trace_leave - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK br $31, ret_from_sys_call .align 3 @@ -447,26 +613,31 @@ $strace_error: stq $0, 0($sp) stq $1, 72($sp) /* a3 for return */ - bsr $1, do_switch_stack + DO_SWITCH_STACK mov $18, $9 /* save old syscall number */ mov $19, $10 /* save old a3 */ jsr $26, syscall_trace_leave mov $9, $18 mov $10, $19 - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK mov $31, $26 /* tell "ret_from_sys_call" we can restart */ br ret_from_sys_call -.end strace +CFI_END_OSF_FRAME entSys /* * Save and restore the switch stack -- aka the balance of the user context. */ .align 4 - .ent do_switch_stack + .type do_switch_stack, @function + .cfi_startproc simple + .cfi_return_column 64 + .cfi_def_cfa $sp, 0 + .cfi_register 64, $1 do_switch_stack: lda $sp, -SWITCH_STACK_SIZE($sp) + .cfi_adjust_cfa_offset SWITCH_STACK_SIZE stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -510,10 +681,14 @@ do_switch_stack: stt $f0, 312($sp) # save fpcr in slot of $f31 ldt $f0, 64($sp) # dont let "do_switch_stack" change fp state. ret $31, ($1), 1 -.end do_switch_stack + .cfi_endproc + .size do_switch_stack, .-do_switch_stack .align 4 - .ent undo_switch_stack + .type undo_switch_stack, @function + .cfi_startproc simple + .cfi_def_cfa $sp, 0 + .cfi_register 64, $1 undo_switch_stack: ldq $9, 0($sp) ldq $10, 8($sp) @@ -558,7 +733,8 @@ undo_switch_stack: ldt $f30, 304($sp) lda $sp, SWITCH_STACK_SIZE($sp) ret $31, ($1), 1 -.end undo_switch_stack + .cfi_endproc + .size undo_switch_stack, .-undo_switch_stack /* * The meat of the context switch code. @@ -566,17 +742,18 @@ undo_switch_stack: .align 4 .globl alpha_switch_to - .ent alpha_switch_to + .type alpha_switch_to, @function + .cfi_startproc alpha_switch_to: - .prologue 0 - bsr $1, do_switch_stack + DO_SWITCH_STACK call_pal PAL_swpctx lda $8, 0x3fff - bsr $1, undo_switch_stack + UNDO_SWITCH_STACK bic $sp, $8, $8 mov $17, $0 ret -.end alpha_switch_to + .cfi_endproc + .size alpha_switch_to, .-alpha_switch_to /* * New processes begin life here. diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index f433fc11877a..28e4429596f3 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -236,7 +236,7 @@ void __init init_rtc_irq(void) { irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip, - handle_simple_irq, "RTC"); + handle_percpu_irq, "RTC"); setup_irq(RTC_IRQ, &timer_irqaction); } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 53b18a620e1c..9dbbcb3b9146 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -264,9 +264,10 @@ recv_secondary_console_msg(void) if (cnt <= 0 || cnt >= 80) strcpy(buf, "<<< BOGUS MSG >>>"); else { - cp1 = (char *) &cpu->ipc_buffer[11]; + cp1 = (char *) &cpu->ipc_buffer[1]; cp2 = buf; - strcpy(cp2, cp1); + memcpy(cp2, cp1, cnt); + cp2[cnt] = '\0'; while ((cp2 = strchr(cp2, '\r')) != 0) { *cp2 = ' '; diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 5bf401f7ea97..6c35159bc00e 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = { static void dp264_device_interrupt(unsigned long vector) { -#if 1 - printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n"); -#else unsigned long pld; unsigned int i; @@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector) isa_device_interrupt(vector); else handle_irq(16 + i); -#if 0 - TSUNAMI_cchip->dir0.csr = 1UL << i; mb(); - tmp = TSUNAMI_cchip->dir0.csr; -#endif } -#endif } static void diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 407accc80877..c92e389ff219 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -317,8 +317,9 @@ marvel_init_irq(void) } static int -marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin) { + struct pci_dev *dev = (struct pci_dev *)cdev; struct pci_controller *hose = dev->sysdata; struct io7_port *io7_port = hose->sysdata; struct io7 *io7 = io7_port->io7; diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 4284ec798ec9..dca9b3fb0071 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -524,6 +524,8 @@ sys_call_table: .quad sys_sendmmsg .quad sys_process_vm_readv .quad sys_process_vm_writev /* 505 */ + .quad sys_kcmp + .quad sys_finit_module .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index e336694ca042..ea3395036556 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -105,9 +105,7 @@ void arch_irq_work_raise(void) static inline __u32 rpcc(void) { - __u32 result; - asm volatile ("rpcc %0" : "=r"(result)); - return result; + return __builtin_alpha_rpcc(); } int update_persistent_clock(struct timespec now) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index be1fba334bd0..bd0665cdc840 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15) { printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx %s\n", regs->pc, regs->r26, regs->ps, print_tainted()); - print_symbol("pc is at %s\n", regs->pc); - print_symbol("ra is at %s\n", regs->r26 ); + printk("pc is at %pSR\n", (void *)regs->pc); + printk("ra is at %pSR\n", (void *)regs->r26); printk("v0 = %016lx t0 = %016lx t1 = %016lx\n", regs->r0, regs->r1, regs->r2); printk("t2 = %016lx t3 = %016lx t4 = %016lx\n", @@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp) continue; if (tmp >= (unsigned long) &_etext) continue; - printk("[<%lx>]", tmp); - print_symbol(" %s", tmp); - printk("\n"); + printk("[<%lx>] %pSR\n", tmp, (void *)tmp); if (i > 40) { printk(" ..."); break; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba412e02ec0c..37c0f4e978d4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1600,8 +1600,7 @@ config LOCAL_TIMERS config ARCH_NR_GPIO int default 1024 if ARCH_SHMOBILE || ARCH_TEGRA - default 512 if SOC_OMAP5 - default 512 if ARCH_KEYSTONE + default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 default 392 if ARCH_U8500 default 352 if ARCH_VT8500 default 288 if ARCH_SUNXI diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi index 9866cd736dee..a0f2721ea583 100644 --- a/arch/arm/boot/dts/atlas6.dtsi +++ b/arch/arm/boot/dts/atlas6.dtsi @@ -485,6 +485,12 @@ sirf,function = "usp0"; }; }; + usp0_uart_nostreamctrl_pins_a: usp0@1 { + usp0 { + sirf,pins = "usp0_uart_nostreamctrl_grp"; + sirf,function = "usp0_uart_nostreamctrl"; + }; + }; usp1_pins_a: usp1@0 { usp1 { sirf,pins = "usp1grp"; @@ -515,16 +521,16 @@ sirf,function = "pulse_count"; }; }; - cko0_rst_pins_a: cko0_rst@0 { - cko0_rst { - sirf,pins = "cko0_rstgrp"; - sirf,function = "cko0_rst"; + cko0_pins_a: cko0@0 { + cko0 { + sirf,pins = "cko0grp"; + sirf,function = "cko0"; }; }; - cko1_rst_pins_a: cko1_rst@0 { - cko1_rst { - sirf,pins = "cko1_rstgrp"; - sirf,function = "cko1_rst"; + cko1_pins_a: cko1@0 { + cko1 { + sirf,pins = "cko1grp"; + sirf,function = "cko1"; }; }; }; diff --git a/arch/arm/boot/dts/imx28-apx4devkit.dts b/arch/arm/boot/dts/imx28-apx4devkit.dts index 43bf3c796cba..0e7fed47bd8d 100644 --- a/arch/arm/boot/dts/imx28-apx4devkit.dts +++ b/arch/arm/boot/dts/imx28-apx4devkit.dts @@ -147,7 +147,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - + clocks = <&saif0>; }; pcf8563: rtc@51 { diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts index 1f0d38d7b16f..e035f4664b97 100644 --- a/arch/arm/boot/dts/imx28-evk.dts +++ b/arch/arm/boot/dts/imx28-evk.dts @@ -195,7 +195,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - + clocks = <&saif0>; }; at24@51 { diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts index 880df2f13be8..44d9da57736e 100644 --- a/arch/arm/boot/dts/imx28-m28evk.dts +++ b/arch/arm/boot/dts/imx28-m28evk.dts @@ -184,7 +184,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - + clocks = <&saif0>; }; eeprom: eeprom@51 { diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 6a8acb01b1d3..9524a0571281 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -837,6 +837,7 @@ compatible = "fsl,imx28-saif"; reg = <0x80042000 0x2000>; interrupts = <59 80>; + #clock-cells = <0>; clocks = <&clks 53>; dmas = <&dma_apbx 4>; dma-names = "rx-tx"; diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts index 6dd9486c755b..ad3471ca17c7 100644 --- a/arch/arm/boot/dts/imx51-babbage.dts +++ b/arch/arm/boot/dts/imx51-babbage.dts @@ -61,6 +61,16 @@ mux-int-port = <2>; mux-ext-port = <3>; }; + + clocks { + clk_26M: codec_clock { + compatible = "fixed-clock"; + reg=<0>; + #clock-cells = <0>; + clock-frequency = <26000000>; + gpios = <&gpio4 26 1>; + }; + }; }; &esdhc1 { @@ -229,6 +239,7 @@ MX51_PAD_EIM_A27__GPIO2_21 0x5 MX51_PAD_CSPI1_SS0__GPIO4_24 0x85 MX51_PAD_CSPI1_SS1__GPIO4_25 0x85 + MX51_PAD_CSPI1_RDY__GPIO4_26 0x80000000 >; }; }; @@ -255,7 +266,7 @@ sgtl5000: codec@0a { compatible = "fsl,sgtl5000"; reg = <0x0a>; - clock-frequency = <26000000>; + clocks = <&clk_26M>; VDDA-supply = <&vdig_reg>; VDDIO-supply = <&vvideo_reg>; }; diff --git a/arch/arm/boot/dts/imx53-mba53.dts b/arch/arm/boot/dts/imx53-mba53.dts index aaa33bc99f78..a63090267941 100644 --- a/arch/arm/boot/dts/imx53-mba53.dts +++ b/arch/arm/boot/dts/imx53-mba53.dts @@ -27,7 +27,7 @@ backlight { compatible = "pwm-backlight"; - pwms = <&pwm2 0 50000 0 0>; + pwms = <&pwm2 0 50000>; brightness-levels = <0 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 92 96 100>; default-brightness-level = <10>; enable-gpios = <&gpio7 7 0>; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index 3895fbba8fce..569aa9f2c4ed 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -725,15 +725,15 @@ uart1 { pinctrl_uart1_1: uart1grp-1 { fsl,pins = < - MX53_PAD_CSI0_DAT10__UART1_TXD_MUX 0x1c5 - MX53_PAD_CSI0_DAT11__UART1_RXD_MUX 0x1c5 + MX53_PAD_CSI0_DAT10__UART1_TXD_MUX 0x1e4 + MX53_PAD_CSI0_DAT11__UART1_RXD_MUX 0x1e4 >; }; pinctrl_uart1_2: uart1grp-2 { fsl,pins = < - MX53_PAD_PATA_DIOW__UART1_TXD_MUX 0x1c5 - MX53_PAD_PATA_DMACK__UART1_RXD_MUX 0x1c5 + MX53_PAD_PATA_DIOW__UART1_TXD_MUX 0x1e4 + MX53_PAD_PATA_DMACK__UART1_RXD_MUX 0x1e4 >; }; @@ -748,8 +748,8 @@ uart2 { pinctrl_uart2_1: uart2grp-1 { fsl,pins = < - MX53_PAD_PATA_BUFFER_EN__UART2_RXD_MUX 0x1c5 - MX53_PAD_PATA_DMARQ__UART2_TXD_MUX 0x1c5 + MX53_PAD_PATA_BUFFER_EN__UART2_RXD_MUX 0x1e4 + MX53_PAD_PATA_DMARQ__UART2_TXD_MUX 0x1e4 >; }; @@ -766,17 +766,17 @@ uart3 { pinctrl_uart3_1: uart3grp-1 { fsl,pins = < - MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1c5 - MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5 - MX53_PAD_PATA_DA_1__UART3_CTS 0x1c5 - MX53_PAD_PATA_DA_2__UART3_RTS 0x1c5 + MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1e4 + MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1e4 + MX53_PAD_PATA_DA_1__UART3_CTS 0x1e4 + MX53_PAD_PATA_DA_2__UART3_RTS 0x1e4 >; }; pinctrl_uart3_2: uart3grp-2 { fsl,pins = < - MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1c5 - MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5 + MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1e4 + MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1e4 >; }; @@ -785,8 +785,8 @@ uart4 { pinctrl_uart4_1: uart4grp-1 { fsl,pins = < - MX53_PAD_KEY_COL0__UART4_TXD_MUX 0x1c5 - MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1c5 + MX53_PAD_KEY_COL0__UART4_TXD_MUX 0x1e4 + MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1e4 >; }; }; @@ -794,8 +794,8 @@ uart5 { pinctrl_uart5_1: uart5grp-1 { fsl,pins = < - MX53_PAD_KEY_COL1__UART5_TXD_MUX 0x1c5 - MX53_PAD_KEY_ROW1__UART5_RXD_MUX 0x1c5 + MX53_PAD_KEY_COL1__UART5_TXD_MUX 0x1e4 + MX53_PAD_KEY_ROW1__UART5_RXD_MUX 0x1e4 >; }; }; diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi index 05e9489cf95c..bbeb623fc2c6 100644 --- a/arch/arm/boot/dts/prima2.dtsi +++ b/arch/arm/boot/dts/prima2.dtsi @@ -515,16 +515,16 @@ sirf,function = "pulse_count"; }; }; - cko0_rst_pins_a: cko0_rst@0 { - cko0_rst { - sirf,pins = "cko0_rstgrp"; - sirf,function = "cko0_rst"; + cko0_pins_a: cko0@0 { + cko0 { + sirf,pins = "cko0grp"; + sirf,function = "cko0"; }; }; - cko1_rst_pins_a: cko1_rst@0 { - cko1_rst { - sirf,pins = "cko1_rstgrp"; - sirf,function = "cko1_rst"; + cko1_pins_a: cko1@0 { + cko1 { + sirf,pins = "cko1grp"; + sirf,function = "cko1"; }; }; }; diff --git a/arch/arm/boot/dts/stih416-pinctrl.dtsi b/arch/arm/boot/dts/stih416-pinctrl.dtsi index 957b21a71b4b..0f246c979262 100644 --- a/arch/arm/boot/dts/stih416-pinctrl.dtsi +++ b/arch/arm/boot/dts/stih416-pinctrl.dtsi @@ -166,6 +166,15 @@ reg = <0x9000 0x100>; st,bank-name = "PIO31"; }; + + serial2-oe { + pinctrl_serial2_oe: serial2-1 { + st,pins { + output-enable = <&PIO11 3 ALT2 OUT>; + }; + }; + }; + }; pin-controller-rear { @@ -218,7 +227,6 @@ st,pins { tx = <&PIO17 4 ALT2 OUT>; rx = <&PIO17 5 ALT2 IN>; - output-enable = <&PIO11 3 ALT2 OUT>; }; }; }; diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi index 3cecd9689a49..1a0326ea7d07 100644 --- a/arch/arm/boot/dts/stih416.dtsi +++ b/arch/arm/boot/dts/stih416.dtsi @@ -79,7 +79,7 @@ interrupts = <0 197 0>; clocks = <&CLK_S_ICN_REG_0>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_serial2>; + pinctrl-0 = <&pinctrl_serial2 &pinctrl_serial2_oe>; }; /* SBC_UART1 */ diff --git a/arch/arm/boot/dts/twl4030.dtsi b/arch/arm/boot/dts/twl4030.dtsi index b3034da00a37..ae6a17aed9ee 100644 --- a/arch/arm/boot/dts/twl4030.dtsi +++ b/arch/arm/boot/dts/twl4030.dtsi @@ -47,6 +47,12 @@ regulator-max-microvolt = <3150000>; }; + vmmc2: regulator-vmmc2 { + compatible = "ti,twl4030-vmmc2"; + regulator-min-microvolt = <1850000>; + regulator-max-microvolt = <3150000>; + }; + vusb1v5: regulator-vusb1v5 { compatible = "ti,twl4030-vusb1v5"; }; diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi index e1eb7dadda80..67d929cf9804 100644 --- a/arch/arm/boot/dts/vf610.dtsi +++ b/arch/arm/boot/dts/vf610.dtsi @@ -442,8 +442,8 @@ compatible = "fsl,mvf600-fec"; reg = <0x400d0000 0x1000>; interrupts = <0 78 0x04>; - clocks = <&clks VF610_CLK_ENET>, - <&clks VF610_CLK_ENET>, + clocks = <&clks VF610_CLK_ENET0>, + <&clks VF610_CLK_ENET0>, <&clks VF610_CLK_ENET>; clock-names = "ipg", "ahb", "ptp"; status = "disabled"; @@ -453,8 +453,8 @@ compatible = "fsl,mvf600-fec"; reg = <0x400d1000 0x1000>; interrupts = <0 79 0x04>; - clocks = <&clks VF610_CLK_ENET>, - <&clks VF610_CLK_ENET>, + clocks = <&clks VF610_CLK_ENET1>, + <&clks VF610_CLK_ENET1>, <&clks VF610_CLK_ENET>; clock-names = "ipg", "ahb", "ptp"; status = "disabled"; diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index a432e6c1dac1..39ad030ac0c7 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -26,7 +26,6 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/edma.h> -#include <linux/err.h> #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_dma.h> diff --git a/arch/arm/configs/da8xx_omapl_defconfig b/arch/arm/configs/da8xx_omapl_defconfig index 7c868139bdb0..1571bea48bed 100644 --- a/arch/arm/configs/da8xx_omapl_defconfig +++ b/arch/arm/configs/da8xx_omapl_defconfig @@ -102,6 +102,8 @@ CONFIG_SND_SOC=m CONFIG_SND_DAVINCI_SOC=m # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set +CONFIG_DMADEVICES=y +CONFIG_TI_EDMA=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_XFS_FS=m diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index c86fd75e181a..ab2f7378352c 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -162,6 +162,8 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_HEARTBEAT=m CONFIG_RTC_CLASS=y +CONFIG_DMADEVICES=y +CONFIG_TI_EDMA=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_XFS_FS=m diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index fe0bdc361d2c..6e572c64cf5a 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -53,6 +53,7 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_OMAP_OCP2SCP=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_AHCI_PLATFORM=y @@ -61,6 +62,7 @@ CONFIG_SATA_MV=y CONFIG_NETDEVICES=y CONFIG_SUN4I_EMAC=y CONFIG_NET_CALXEDA_XGMAC=y +CONFIG_KS8851=y CONFIG_SMSC911X=y CONFIG_STMMAC_ETH=y CONFIG_MDIO_SUN4I=y @@ -89,6 +91,7 @@ CONFIG_I2C_DESIGNWARE_PLATFORM=y CONFIG_I2C_SIRF=y CONFIG_I2C_TEGRA=y CONFIG_SPI=y +CONFIG_SPI_OMAP24XX=y CONFIG_SPI_PL022=y CONFIG_SPI_SIRF=y CONFIG_SPI_TEGRA114=y @@ -111,11 +114,12 @@ CONFIG_FB_SIMPLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_MXC=y CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_ISP1760_HCD=y CONFIG_USB_STORAGE=y +CONFIG_USB_CHIPIDEA=y +CONFIG_USB_CHIPIDEA_HOST=y CONFIG_AB8500_USB=y CONFIG_NOP_USB_XCEIV=y CONFIG_OMAP_USB2=y diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index 35f8cf299fa2..263ae3869e32 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -1,6 +1,8 @@ # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -48,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_TESTS=m CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_NAND_ECC_SMC=y CONFIG_MTD_NAND=y @@ -94,8 +95,10 @@ CONFIG_I2C_GPIO=y CONFIG_I2C_NOMADIK=y CONFIG_DEBUG_GPIO=y # CONFIG_HWMON is not set +CONFIG_REGULATOR=y CONFIG_MMC=y -CONFIG_MMC_CLKGATE=y +CONFIG_MMC_UNSAFE_RESUME=y +# CONFIG_MMC_BLOCK_BOUNCE is not set CONFIG_MMC_ARMMMCI=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index afbc439f11d4..4cdb61c54459 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -505,7 +505,7 @@ static struct vpbe_output dm365evm_vpbe_outputs[] = { /* * Amplifiers on the board */ -struct ths7303_platform_data ths7303_pdata = { +static struct ths7303_platform_data ths7303_pdata = { .ch_1 = 3, .ch_2 = 3, .ch_3 = 3, diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 42ef53f62c6c..86100d179694 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -860,7 +860,7 @@ static struct platform_device dm355_vpbe_display = { }, }; -struct venc_platform_data dm355_venc_pdata = { +static struct venc_platform_data dm355_venc_pdata = { .setup_pinmux = dm355_vpbe_setup_pinmux, .setup_clock = dm355_venc_setup_clock, }; diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index fa7af5eda52d..dad28029ba9b 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -1349,7 +1349,7 @@ static struct platform_device dm365_vpbe_display = { }, }; -struct venc_platform_data dm365_venc_pdata = { +static struct venc_platform_data dm365_venc_pdata = { .setup_pinmux = dm365_vpbe_setup_pinmux, .setup_clock = dm365_venc_setup_clock, }; diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 855d4a7b462d..5952e68c76c4 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -92,6 +92,7 @@ config SOC_EXYNOS5440 bool "SAMSUNG EXYNOS5440" default y depends on ARCH_EXYNOS5 + select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_HAS_OPP select HAVE_ARM_ARCH_TIMER select AUTO_ZRELADDR diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile index e970a7a4e278..53696154aead 100644 --- a/arch/arm/mach-exynos/Makefile +++ b/arch/arm/mach-exynos/Makefile @@ -14,7 +14,7 @@ obj- := obj-$(CONFIG_ARCH_EXYNOS) += common.o -obj-$(CONFIG_PM) += pm.o +obj-$(CONFIG_S5P_PM) += pm.o obj-$(CONFIG_PM_GENERIC_DOMAINS) += pm_domains.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index 164685bd25c8..ba95e5db2501 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -58,7 +58,6 @@ static const char name_exynos5440[] = "EXYNOS5440"; static void exynos4_map_io(void); static void exynos5_map_io(void); -static void exynos5440_map_io(void); static int exynos_init(void); static struct cpu_table cpu_ids[] __initdata = { @@ -95,7 +94,6 @@ static struct cpu_table cpu_ids[] __initdata = { }, { .idcode = EXYNOS5440_SOC_ID, .idmask = EXYNOS5_SOC_MASK, - .map_io = exynos5440_map_io, .init = exynos_init, .name = name_exynos5440, }, @@ -150,11 +148,6 @@ static struct map_desc exynos4_iodesc[] __initdata = { .length = SZ_64K, .type = MT_DEVICE, }, { - .virtual = (unsigned long)S3C_VA_UART, - .pfn = __phys_to_pfn(EXYNOS4_PA_UART), - .length = SZ_512K, - .type = MT_DEVICE, - }, { .virtual = (unsigned long)S5P_VA_CMU, .pfn = __phys_to_pfn(EXYNOS4_PA_CMU), .length = SZ_128K, @@ -268,20 +261,6 @@ static struct map_desc exynos5_iodesc[] __initdata = { .pfn = __phys_to_pfn(EXYNOS5_PA_PMU), .length = SZ_64K, .type = MT_DEVICE, - }, { - .virtual = (unsigned long)S3C_VA_UART, - .pfn = __phys_to_pfn(EXYNOS5_PA_UART), - .length = SZ_512K, - .type = MT_DEVICE, - }, -}; - -static struct map_desc exynos5440_iodesc0[] __initdata = { - { - .virtual = (unsigned long)S3C_VA_UART, - .pfn = __phys_to_pfn(EXYNOS5440_PA_UART0), - .length = SZ_512K, - .type = MT_DEVICE, }, }; @@ -388,11 +367,6 @@ static void __init exynos5_map_io(void) iotable_init(exynos5250_iodesc, ARRAY_SIZE(exynos5250_iodesc)); } -static void __init exynos5440_map_io(void) -{ - iotable_init(exynos5440_iodesc0, ARRAY_SIZE(exynos5440_iodesc0)); -} - void __init exynos_init_time(void) { of_clk_init(NULL); diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index 3e156bcddcb4..972490fc09d6 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -97,6 +97,5 @@ struct exynos_pmu_conf { }; extern void exynos_sys_powerdown_conf(enum sys_powerdown mode); -extern void s3c_cpu_resume(void); #endif /* __ARCH_ARM_MACH_EXYNOS_COMMON_H */ diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c index 17a18ff3d71e..225ee8431c72 100644 --- a/arch/arm/mach-exynos/cpuidle.c +++ b/arch/arm/mach-exynos/cpuidle.c @@ -25,6 +25,7 @@ #include <mach/regs-pmu.h> #include <plat/cpu.h> +#include <plat/pm.h> #include "common.h" diff --git a/arch/arm/mach-exynos/include/mach/memory.h b/arch/arm/mach-exynos/include/mach/memory.h index 374ef2cf7152..2a4cdb7cb326 100644 --- a/arch/arm/mach-exynos/include/mach/memory.h +++ b/arch/arm/mach-exynos/include/mach/memory.h @@ -15,8 +15,13 @@ #define PLAT_PHYS_OFFSET UL(0x40000000) +#ifndef CONFIG_ARM_LPAE /* Maximum of 256MiB in one bank */ #define MAX_PHYSMEM_BITS 32 #define SECTION_SIZE_BITS 28 +#else +#define MAX_PHYSMEM_BITS 36 +#define SECTION_SIZE_BITS 31 +#endif #endif /* __ASM_ARCH_MEMORY_H */ diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c index 41c20692a13f..c679db577269 100644 --- a/arch/arm/mach-exynos/pm.c +++ b/arch/arm/mach-exynos/pm.c @@ -217,6 +217,9 @@ static __init int exynos_pm_drvinit(void) struct clk *pll_base; unsigned int tmp; + if (soc_is_exynos5440()) + return 0; + s3c_pm_init(); /* All wakeup disable */ @@ -340,6 +343,9 @@ static struct syscore_ops exynos_pm_syscore_ops = { static __init int exynos_pm_syscore_init(void) { + if (soc_is_exynos5440()) + return 0; + register_syscore_ops(&exynos_pm_syscore_ops); return 0; } diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index a7cd2cf5e08d..3490a24f969e 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -276,8 +276,6 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys) sys->mem_offset = DC21285_PCI_MEM; - pci_ioremap_io(0, DC21285_PCI_IO); - pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset); pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index dc5d6becd8c7..88815795fe26 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -115,6 +115,7 @@ static int highbank_platform_notifier(struct notifier_block *nb, { struct resource *res; int reg = -1; + u32 val; struct device *dev = __dev; if (event != BUS_NOTIFY_ADD_DEVICE) @@ -141,10 +142,10 @@ static int highbank_platform_notifier(struct notifier_block *nb, return NOTIFY_DONE; if (of_property_read_bool(dev->of_node, "dma-coherent")) { - writel(0xff31, sregs_base + reg); + val = readl(sregs_base + reg); + writel(val | 0xff01, sregs_base + reg); set_dma_ops(dev, &arm_coherent_dma_ops); - } else - writel(0, sregs_base + reg); + } return NOTIFY_OK; } diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 4282e99f5ca1..86567d980b07 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -199,7 +199,8 @@ static const char *pcie_axi_sels[] = { "axi", "ahb", }; static const char *ssi_sels[] = { "pll3_pfd2_508m", "pll3_pfd3_454m", "pll4_post_div", }; static const char *usdhc_sels[] = { "pll2_pfd2_396m", "pll2_pfd0_352m", }; static const char *enfc_sels[] = { "pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", }; -static const char *emi_sels[] = { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", }; +static const char *emi_sels[] = { "pll2_pfd2_396m", "pll3_usb_otg", "axi", "pll2_pfd0_352m", }; +static const char *emi_slow_sels[] = { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", }; static const char *vdo_axi_sels[] = { "axi", "ahb", }; static const char *vpu_axi_sels[] = { "axi", "pll2_pfd2_396m", "pll2_pfd0_352m", }; static const char *cko1_sels[] = { "pll3_usb_otg", "pll2_bus", "pll1_sys", "pll5_video_div", @@ -392,7 +393,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) clk[usdhc4_sel] = imx_clk_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels)); clk[enfc_sel] = imx_clk_mux("enfc_sel", base + 0x2c, 16, 2, enfc_sels, ARRAY_SIZE(enfc_sels)); clk[emi_sel] = imx_clk_mux("emi_sel", base + 0x1c, 27, 2, emi_sels, ARRAY_SIZE(emi_sels)); - clk[emi_slow_sel] = imx_clk_mux("emi_slow_sel", base + 0x1c, 29, 2, emi_sels, ARRAY_SIZE(emi_sels)); + clk[emi_slow_sel] = imx_clk_mux("emi_slow_sel", base + 0x1c, 29, 2, emi_slow_sels, ARRAY_SIZE(emi_slow_sels)); clk[vdo_axi_sel] = imx_clk_mux("vdo_axi_sel", base + 0x18, 11, 1, vdo_axi_sels, ARRAY_SIZE(vdo_axi_sels)); clk[vpu_axi_sel] = imx_clk_mux("vpu_axi_sel", base + 0x18, 14, 2, vpu_axi_sels, ARRAY_SIZE(vpu_axi_sels)); clk[cko1_sel] = imx_clk_mux("cko1_sel", base + 0x60, 0, 4, cko1_sels, ARRAY_SIZE(cko1_sels)); diff --git a/arch/arm/mach-imx/clk-vf610.c b/arch/arm/mach-imx/clk-vf610.c index d617c0b7c809..b169a396d93b 100644 --- a/arch/arm/mach-imx/clk-vf610.c +++ b/arch/arm/mach-imx/clk-vf610.c @@ -183,6 +183,8 @@ static void __init vf610_clocks_init(struct device_node *ccm_node) clk[VF610_CLK_ENET_TS_SEL] = imx_clk_mux("enet_ts_sel", CCM_CSCMR2, 0, 3, enet_ts_sels, 7); clk[VF610_CLK_ENET] = imx_clk_gate("enet", "enet_sel", CCM_CSCDR1, 24); clk[VF610_CLK_ENET_TS] = imx_clk_gate("enet_ts", "enet_ts_sel", CCM_CSCDR1, 23); + clk[VF610_CLK_ENET0] = imx_clk_gate2("enet0", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(0)); + clk[VF610_CLK_ENET1] = imx_clk_gate2("enet1", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(1)); clk[VF610_CLK_PIT] = imx_clk_gate2("pit", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(7)); diff --git a/arch/arm/mach-imx/mx27.h b/arch/arm/mach-imx/mx27.h index e074616d54ca..8a65f192e7f3 100644 --- a/arch/arm/mach-imx/mx27.h +++ b/arch/arm/mach-imx/mx27.h @@ -135,7 +135,7 @@ #define MX27_INT_GPT4 (NR_IRQS_LEGACY + 4) #define MX27_INT_RTIC (NR_IRQS_LEGACY + 5) #define MX27_INT_CSPI3 (NR_IRQS_LEGACY + 6) -#define MX27_INT_SDHC (NR_IRQS_LEGACY + 7) +#define MX27_INT_MSHC (NR_IRQS_LEGACY + 7) #define MX27_INT_GPIO (NR_IRQS_LEGACY + 8) #define MX27_INT_SDHC3 (NR_IRQS_LEGACY + 9) #define MX27_INT_SDHC2 (NR_IRQS_LEGACY + 10) diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index fe4d9ff93a7e..b661c5c2870a 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -49,7 +49,7 @@ static const char *keystone_match[] __initconst = { NULL, }; -void keystone_restart(char mode, const char *cmd) +void keystone_restart(enum reboot_mode mode, const char *cmd) { u32 val; diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 627fa7e41fba..3eed0006d189 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -62,7 +62,7 @@ config SOC_OMAP5 select HAVE_SMP select COMMON_CLK select HAVE_ARM_ARCH_TIMER - select ARM_ERRATA_798181 + select ARM_ERRATA_798181 if SMP config SOC_AM33XX bool "AM33XX support" diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index e5fbfed69aa2..be5d005ebad2 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -15,6 +15,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/irqdomain.h> +#include <linux/clk.h> #include <asm/mach/arch.h> @@ -35,6 +36,21 @@ static struct of_device_id omap_dt_match_table[] __initdata = { { } }; +/* + * Create alias for USB host PHY clock. + * Remove this when clock phandle can be provided via DT + */ +static void __init legacy_init_ehci_clk(char *clkname) +{ + int ret; + + ret = clk_add_alias("main_clk", NULL, clkname, NULL); + if (ret) { + pr_err("%s:Failed to add main_clk alias to %s :%d\n", + __func__, clkname, ret); + } +} + static void __init omap_generic_init(void) { omap_sdrc_init(NULL, NULL); @@ -45,10 +61,15 @@ static void __init omap_generic_init(void) * HACK: call display setup code for selected boards to enable omapdss. * This will be removed when omapdss supports DT. */ - if (of_machine_is_compatible("ti,omap4-panda")) + if (of_machine_is_compatible("ti,omap4-panda")) { omap4_panda_display_init_of(); + legacy_init_ehci_clk("auxclk3_ck"); + + } else if (of_machine_is_compatible("ti,omap4-sdp")) omap_4430sdp_display_init_of(); + else if (of_machine_is_compatible("ti,omap5-uevm")) + legacy_init_ehci_clk("auxclk1_ck"); } #ifdef CONFIG_SOC_OMAP2420 diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index f6726bb4eb95..3a3362fa793e 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -477,16 +477,24 @@ static int em_x270_usb_hub_init(void) /* USB Hub power-on and reset */ gpio_direction_output(usb_hub_reset, 1); gpio_direction_output(GPIO9_USB_VBUS_EN, 0); - regulator_enable(em_x270_usb_ldo); + err = regulator_enable(em_x270_usb_ldo); + if (err) + goto err_free_rst_gpio; + gpio_set_value(usb_hub_reset, 0); gpio_set_value(usb_hub_reset, 1); regulator_disable(em_x270_usb_ldo); - regulator_enable(em_x270_usb_ldo); + err = regulator_enable(em_x270_usb_ldo); + if (err) + goto err_free_rst_gpio; + gpio_set_value(usb_hub_reset, 0); gpio_set_value(GPIO9_USB_VBUS_EN, 1); return 0; +err_free_rst_gpio: + gpio_free(usb_hub_reset); err_free_vbus_gpio: gpio_free(GPIO9_USB_VBUS_EN); err_free_usb_ldo: @@ -592,7 +600,7 @@ err_irq: return err; } -static void em_x270_mci_setpower(struct device *dev, unsigned int vdd) +static int em_x270_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -600,10 +608,11 @@ static void em_x270_mci_setpower(struct device *dev, unsigned int vdd) int vdd_uV = (2000 + (vdd - __ffs(MMC_VDD_20_21)) * 100) * 1000; regulator_set_voltage(em_x270_sdio_ldo, vdd_uV, vdd_uV); - regulator_enable(em_x270_sdio_ldo); + return regulator_enable(em_x270_sdio_ldo); } else { regulator_disable(em_x270_sdio_ldo); } + return 0; } static void em_x270_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index d2c652318376..dd70343c8708 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -408,7 +408,7 @@ static int mainstone_mci_init(struct device *dev, irq_handler_t mstone_detect_in return err; } -static void mainstone_mci_setpower(struct device *dev, unsigned int vdd) +static int mainstone_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -420,6 +420,7 @@ static void mainstone_mci_setpower(struct device *dev, unsigned int vdd) printk(KERN_DEBUG "%s: off\n", __func__); MST_MSCWR1 &= ~MST_MSCWR1_MMC_ON; } + return 0; } static void mainstone_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index fb7f1d1627dc..13e5b00eae90 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -335,7 +335,7 @@ static int pcm990_mci_init(struct device *dev, irq_handler_t mci_detect_int, return err; } -static void pcm990_mci_setpower(struct device *dev, unsigned int vdd) +static int pcm990_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data *p_d = dev->platform_data; u8 val; @@ -348,6 +348,7 @@ static void pcm990_mci_setpower(struct device *dev, unsigned int vdd) val &= ~PCM990_CTRL_MMC2PWR; pcm990_cpld_writeb(PCM990_CTRL_MMC2PWR, PCM990_CTRL_REG5); + return 0; } static void pcm990_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 711d37e26bd8..aedf053a1de5 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -258,7 +258,7 @@ err_free_2: return err; } -static void poodle_mci_setpower(struct device *dev, unsigned int vdd) +static int poodle_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -270,6 +270,8 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd) gpio_set_value(POODLE_GPIO_SD_PWR1, 0); gpio_set_value(POODLE_GPIO_SD_PWR, 0); } + + return 0; } static void poodle_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 2125df0444e7..4c29173026e8 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -598,7 +598,7 @@ static inline void spitz_spi_init(void) {} * NOTE: The card detect interrupt isn't debounced so we delay it by 250ms to * give the card a chance to fully insert/eject. */ -static void spitz_mci_setpower(struct device *dev, unsigned int vdd) +static int spitz_mci_setpower(struct device *dev, unsigned int vdd) { struct pxamci_platform_data* p_d = dev->platform_data; @@ -606,6 +606,8 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd) spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, SCOOP_CPR_SD_3V); else spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, 0x0); + + return 0; } static struct pxamci_platform_data spitz_mci_platform_data = { diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 88fde43c948c..62aea3e835f3 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -734,9 +734,10 @@ static int stargate2_mci_init(struct device *dev, * * Very simple control. Either it is on or off and is controlled by * a gpio pin */ -static void stargate2_mci_setpower(struct device *dev, unsigned int vdd) +static int stargate2_mci_setpower(struct device *dev, unsigned int vdd) { gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd); + return 0; } static void stargate2_mci_exit(struct device *dev, void *data) diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c index 34fffdf6fc1d..564553694b54 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2410.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c @@ -119,66 +119,101 @@ static struct clk init_clocks_off[] = { } }; -static struct clk init_clocks[] = { - { - .name = "lcd", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_LCDC, - }, { - .name = "gpio", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_GPIO, - }, { - .name = "usb-host", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_USBH, - }, { - .name = "usb-device", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_USBD, - }, { - .name = "timers", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_PWMT, - }, { - .name = "uart", - .devname = "s3c2410-uart.0", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART0, - }, { - .name = "uart", - .devname = "s3c2410-uart.1", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART1, - }, { - .name = "uart", - .devname = "s3c2410-uart.2", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART2, - }, { - .name = "rtc", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_RTC, - }, { - .name = "watchdog", - .parent = &clk_p, - .ctrlbit = 0, - }, { - .name = "usb-bus-host", - .parent = &clk_usb_bus, - }, { - .name = "usb-bus-gadget", - .parent = &clk_usb_bus, - }, +static struct clk clk_lcd = { + .name = "lcd", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_LCDC, +}; + +static struct clk clk_gpio = { + .name = "gpio", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_GPIO, +}; + +static struct clk clk_usb_host = { + .name = "usb-host", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_USBH, +}; + +static struct clk clk_usb_device = { + .name = "usb-device", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_USBD, +}; + +static struct clk clk_timers = { + .name = "timers", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_PWMT, +}; + +struct clk s3c24xx_clk_uart0 = { + .name = "uart", + .devname = "s3c2410-uart.0", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART0, +}; + +struct clk s3c24xx_clk_uart1 = { + .name = "uart", + .devname = "s3c2410-uart.1", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART1, +}; + +struct clk s3c24xx_clk_uart2 = { + .name = "uart", + .devname = "s3c2410-uart.2", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART2, +}; + +static struct clk clk_rtc = { + .name = "rtc", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_RTC, +}; + +static struct clk clk_watchdog = { + .name = "watchdog", + .parent = &clk_p, + .ctrlbit = 0, +}; + +static struct clk clk_usb_bus_host = { + .name = "usb-bus-host", + .parent = &clk_usb_bus, +}; + +static struct clk clk_usb_bus_gadget = { + .name = "usb-bus-gadget", + .parent = &clk_usb_bus, +}; + +static struct clk *init_clocks[] = { + &clk_lcd, + &clk_gpio, + &clk_usb_host, + &clk_usb_device, + &clk_timers, + &s3c24xx_clk_uart0, + &s3c24xx_clk_uart1, + &s3c24xx_clk_uart2, + &clk_rtc, + &clk_watchdog, + &clk_usb_bus_host, + &clk_usb_bus_gadget, }; /* s3c2410_baseclk_add() @@ -195,7 +230,6 @@ int __init s3c2410_baseclk_add(void) { unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW); unsigned long clkcon = __raw_readl(S3C2410_CLKCON); - struct clk *clkp; struct clk *xtal; int ret; int ptr; @@ -207,8 +241,9 @@ int __init s3c2410_baseclk_add(void) /* register clocks from clock array */ - clkp = init_clocks; - for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) { + for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) { + struct clk *clkp = init_clocks[ptr]; + /* ensure that we note the clock state */ clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0; diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c index 1069b5680826..aaf006d1d6dc 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2440.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c @@ -166,6 +166,9 @@ static struct clk_lookup s3c2440_clk_lookup[] = { CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk), CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p), CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n), + CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0), + CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1), + CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2), CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll), }; diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig index d04e3bfe1918..835833e3c4f8 100644 --- a/arch/arm/mach-sti/Kconfig +++ b/arch/arm/mach-sti/Kconfig @@ -11,8 +11,9 @@ menuconfig ARCH_STI select HAVE_SMP select HAVE_ARM_SCU if SMP select ARCH_REQUIRE_GPIOLIB - select ARM_ERRATA_720789 select ARM_ERRATA_754322 + select ARM_ERRATA_764369 + select ARM_ERRATA_775420 select PL310_ERRATA_753970 if CACHE_PL310 select PL310_ERRATA_769419 if CACHE_PL310 help diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 5b799c29886e..5f252569c689 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -91,7 +91,7 @@ static void __init zynq_map_io(void) zynq_scu_map_io(); } -static void zynq_system_reset(char mode, const char *cmd) +static void zynq_system_reset(enum reboot_mode mode, const char *cmd) { zynq_slcr_system_reset(); } diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index 3dc5cbea86cc..a5b5ff6e68d2 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -29,6 +29,13 @@ config PLAT_S5P help Base platform code for Samsung's S5P series SoC. +config SAMSUNG_PM + bool + depends on PM && (PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5P64X0 || S5P_PM) + default y + help + Base platform power management code for samsung code + if PLAT_SAMSUNG # boot configurations diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile index 98d07d8fc7a7..199bbe304d02 100644 --- a/arch/arm/plat-samsung/Makefile +++ b/arch/arm/plat-samsung/Makefile @@ -51,7 +51,7 @@ obj-$(CONFIG_SAMSUNG_DMADEV) += dma-ops.o # PM support -obj-$(CONFIG_PM) += pm.o +obj-$(CONFIG_SAMSUNG_PM) += pm.o obj-$(CONFIG_SAMSUNG_PM_GPIO) += pm-gpio.o obj-$(CONFIG_SAMSUNG_PM_CHECK) += pm-check.o diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h index a62753dc15ba..df45d6edc98d 100644 --- a/arch/arm/plat-samsung/include/plat/clock.h +++ b/arch/arm/plat-samsung/include/plat/clock.h @@ -83,6 +83,11 @@ extern struct clk clk_ext; extern struct clksrc_clk clk_epllref; extern struct clksrc_clk clk_esysclk; +/* S3C24XX UART clocks */ +extern struct clk s3c24xx_clk_uart0; +extern struct clk s3c24xx_clk_uart1; +extern struct clk s3c24xx_clk_uart2; + /* S3C64XX specific clocks */ extern struct clk clk_h2; extern struct clk clk_27m; diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h index 5d47ca35cabd..6bc1a8f471e3 100644 --- a/arch/arm/plat-samsung/include/plat/pm.h +++ b/arch/arm/plat-samsung/include/plat/pm.h @@ -19,7 +19,7 @@ struct device; -#ifdef CONFIG_PM +#ifdef CONFIG_SAMSUNG_PM extern __init int s3c_pm_init(void); extern __init int s3c64xx_pm_init(void); @@ -58,8 +58,6 @@ extern unsigned char pm_uart_udivslot; /* true to save UART UDIVSLOT */ /* from sleep.S */ -extern void s3c_cpu_resume(void); - extern int s3c2410_cpu_suspend(unsigned long); /* sleep save info */ @@ -106,12 +104,14 @@ extern void s3c_pm_do_save(struct sleep_save *ptr, int count); extern void s3c_pm_do_restore(struct sleep_save *ptr, int count); extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count); -#ifdef CONFIG_PM +#ifdef CONFIG_SAMSUNG_PM extern int s3c_irq_wake(struct irq_data *data, unsigned int state); extern int s3c_irqext_wake(struct irq_data *data, unsigned int state); +extern void s3c_cpu_resume(void); #else #define s3c_irq_wake NULL #define s3c_irqext_wake NULL +#define s3c_cpu_resume NULL #endif /* PM debug functions */ diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c index ea3613642451..d0c23010b693 100644 --- a/arch/arm/plat-samsung/pm.c +++ b/arch/arm/plat-samsung/pm.c @@ -80,7 +80,7 @@ unsigned char pm_uart_udivslot; #ifdef CONFIG_SAMSUNG_PM_DEBUG -static struct pm_uart_save uart_save[CONFIG_SERIAL_SAMSUNG_UARTS]; +static struct pm_uart_save uart_save; static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save) { @@ -101,11 +101,7 @@ static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save) static void s3c_pm_save_uarts(void) { - struct pm_uart_save *save = uart_save; - unsigned int uart; - - for (uart = 0; uart < CONFIG_SERIAL_SAMSUNG_UARTS; uart++, save++) - s3c_pm_save_uart(uart, save); + s3c_pm_save_uart(CONFIG_DEBUG_S3C_UART, &uart_save); } static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save) @@ -126,11 +122,7 @@ static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save) static void s3c_pm_restore_uarts(void) { - struct pm_uart_save *save = uart_save; - unsigned int uart; - - for (uart = 0; uart < CONFIG_SERIAL_SAMSUNG_UARTS; uart++, save++) - s3c_pm_restore_uart(uart, save); + s3c_pm_restore_uart(CONFIG_DEBUG_S3C_UART, &uart_save); } #else static void s3c_pm_save_uarts(void) { } diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 3659e460071d..23a3c4791d86 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -24,10 +24,10 @@ #include <linux/compiler.h> #ifndef CONFIG_ARM64_64K_PAGES -#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE_ORDER 2 #endif -#define THREAD_SIZE 8192 +#define THREAD_SIZE 16384 #define THREAD_START_SP (THREAD_SIZE - 16) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 439827271e3d..26e310c54344 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -21,6 +21,7 @@ #define BOOT_CPU_MODE_EL2 (0x0e12b007) #ifndef __ASSEMBLY__ +#include <asm/cacheflush.h> /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -36,9 +37,20 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); phys_addr_t __hyp_get_vectors(void); +static inline void sync_boot_mode(void) +{ + /* + * As secondaries write to __boot_cpu_mode with caches disabled, we + * must flush the corresponding cache entries to ensure the visibility + * of their writes. + */ + __flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode)); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { + sync_boot_mode(); return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -46,6 +58,7 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { + sync_boot_mode(); return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1d1314280a03..6ad781b21c08 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -121,7 +121,7 @@ .macro get_thread_info, rd mov \rd, sp - and \rd, \rd, #~((1 << 13) - 1) // top of 8K stack + and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack .endm /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 1788bf6b471f..57fb55c44c90 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -81,7 +81,7 @@ void soft_restart(unsigned long addr) void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); -void (*arm_pm_restart)(char str, const char *cmd); +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); EXPORT_SYMBOL_GPL(arm_pm_restart); void arch_cpu_idle_prepare(void) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 09a8743143f3..d3e5e9bc8f94 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -55,6 +55,8 @@ struct device_node; #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ #define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ +#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ + struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ int state; /* PE EEH dependent mode */ @@ -72,8 +74,8 @@ struct eeh_pe { struct list_head child; /* Child PEs */ }; -#define eeh_pe_for_each_dev(pe, edev) \ - list_for_each_entry(edev, &pe->edevs, list) +#define eeh_pe_for_each_dev(pe, edev, tmp) \ + list_for_each_entry_safe(edev, tmp, &pe->edevs, list) /* * The struct is used to trace EEH state for the associated @@ -82,7 +84,13 @@ struct eeh_pe { * another tree except the currently existing tree of PCI * buses and PCI devices */ -#define EEH_DEV_IRQ_DISABLED (1<<0) /* Interrupt disabled */ +#define EEH_DEV_BRIDGE (1 << 0) /* PCI bridge */ +#define EEH_DEV_ROOT_PORT (1 << 1) /* PCIe root port */ +#define EEH_DEV_DS_PORT (1 << 2) /* Downstream port */ +#define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */ +#define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */ + +#define EEH_DEV_SYSFS (1 << 8) /* Sysfs created */ struct eeh_dev { int mode; /* EEH mode */ @@ -90,11 +98,13 @@ struct eeh_dev { int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ u32 config_space[16]; /* Saved PCI config space */ + u8 pcie_cap; /* Saved PCIe capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ struct device_node *dn; /* Associated device node */ struct pci_dev *pdev; /* Associated PCI device */ + struct pci_bus *bus; /* PCI bus for partial hotplug */ }; static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) @@ -193,8 +203,10 @@ int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); int eeh_add_to_parent_pe(struct eeh_dev *edev); -int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); +int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); +void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag); void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); @@ -209,10 +221,12 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_build(void); +void eeh_add_device_early(struct device_node *); void eeh_add_device_tree_early(struct device_node *); +void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); -void eeh_remove_bus_device(struct pci_dev *, int); +void eeh_remove_device(struct pci_dev *); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -252,13 +266,17 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token static inline void eeh_addr_cache_build(void) { } +static inline void eeh_add_device_early(struct device_node *dn) { } + static inline void eeh_add_device_tree_early(struct device_node *dn) { } +static inline void eeh_add_device_late(struct pci_dev *dev) { } + static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } -static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } +static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index ba713f166fa5..10be1dd01c6b 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -96,10 +96,11 @@ static inline bool arch_irqs_disabled(void) #endif #define hard_irq_disable() do { \ - u8 _was_enabled = get_paca()->soft_enabled; \ + u8 _was_enabled; \ __hard_irq_disable(); \ - get_paca()->soft_enabled = 0; \ - get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \ + _was_enabled = local_paca->soft_enabled; \ + local_paca->soft_enabled = 0; \ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (_was_enabled) \ trace_hardirqs_off(); \ } while(0) diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index c1df590ec444..49fa55bfbac4 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -82,10 +82,9 @@ struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, struct exception_table_entry *finish); -#ifdef CONFIG_MODVERSIONS +#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) #define ARCH_RELOCATES_KCRCTAB - -extern const unsigned long reloc_start[]; +#define reloc_start PHYSICAL_START #endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MODULE_H */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 2c1d8cb9b265..32d0d2018faf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -209,7 +209,6 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn); /** Remove all of the PCI devices under this bus */ -extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe); extern void pcibios_remove_pci_devices(struct pci_bus *bus); /** Discover new pci devices under this bus, and add them */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5d7d9c2a5473..a6840e4e24f7 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1088,7 +1088,8 @@ #define PVR_970MP 0x0044 #define PVR_970GX 0x0045 #define PVR_POWER7p 0x004A -#define PVR_POWER8 0x004B +#define PVR_POWER8E 0x004B +#define PVR_POWER8 0x004D #define PVR_BE 0x0070 #define PVR_PA6T 0x0090 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2a45d0f04385..22973a74df73 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_power7, .platform = "power7+", }, - { /* Power8 */ + { /* Power8E */ .pvr_mask = 0xffff0000, .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .platform = "power8", + }, + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, .cpu_name = "POWER8 (raw)", .cpu_features = CPU_FTRS_POWER8, .cpu_user_features = COMMON_USER_POWER8, diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 39954fe941b8..ea9414c8088d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -231,7 +231,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev; + struct eeh_dev *edev, *tmp; bool valid_cfg_log = true; /* @@ -251,7 +251,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev) { + eeh_pe_for_each_dev(pe, edev, tmp) { loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, EEH_PCI_REGS_LOG_LEN - loglen); } @@ -499,8 +499,6 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon } eeh_dev_check_failure(edev); - - pci_dev_put(eeh_dev_to_pci_dev(edev)); return val; } @@ -838,7 +836,7 @@ core_initcall_sync(eeh_init); * on the CEC architecture, type of the device, on earlier boot * command-line arguments & etc. */ -static void eeh_add_device_early(struct device_node *dn) +void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; @@ -886,7 +884,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -static void eeh_add_device_late(struct pci_dev *dev) +void eeh_add_device_late(struct pci_dev *dev) { struct device_node *dn; struct eeh_dev *edev; @@ -902,9 +900,23 @@ static void eeh_add_device_late(struct pci_dev *dev) pr_debug("EEH: Already referenced !\n"); return; } - WARN_ON(edev->pdev); - pci_dev_get(dev); + /* + * The EEH cache might not be removed correctly because of + * unbalanced kref to the device during unplug time, which + * relies on pcibios_release_device(). So we have to remove + * that here explicitly. + */ + if (edev->pdev) { + eeh_rmv_from_parent_pe(edev); + eeh_addr_cache_rmv_dev(edev->pdev); + eeh_sysfs_remove_device(edev->pdev); + edev->mode &= ~EEH_DEV_SYSFS; + + edev->pdev = NULL; + dev->dev.archdata.edev = NULL; + } + edev->pdev = dev; dev->dev.archdata.edev = edev; @@ -967,7 +979,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed - * @purge_pe: remove the PE or not * * This routine should be called when a device is removed from * a running system (e.g. by hotplug or dlpar). It unregisters @@ -975,7 +986,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -static void eeh_remove_device(struct pci_dev *dev, int purge_pe) +void eeh_remove_device(struct pci_dev *dev) { struct eeh_dev *edev; @@ -986,42 +997,29 @@ static void eeh_remove_device(struct pci_dev *dev, int purge_pe) /* Unregister the device with the EEH/PCI address search system */ pr_debug("EEH: Removing device %s\n", pci_name(dev)); - if (!edev || !edev->pdev) { + if (!edev || !edev->pdev || !edev->pe) { pr_debug("EEH: Not referenced !\n"); return; } + + /* + * During the hotplug for EEH error recovery, we need the EEH + * device attached to the parent PE in order for BAR restore + * a bit later. So we keep it for BAR restore and remove it + * from the parent PE during the BAR resotre. + */ edev->pdev = NULL; dev->dev.archdata.edev = NULL; - pci_dev_put(dev); + if (!(edev->pe->state & EEH_PE_KEEP)) + eeh_rmv_from_parent_pe(edev); + else + edev->mode |= EEH_DEV_DISCONNECTED; - eeh_rmv_from_parent_pe(edev, purge_pe); eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); + edev->mode &= ~EEH_DEV_SYSFS; } -/** - * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device - * @dev: PCI device - * @purge_pe: remove the corresponding PE or not - * - * This routine must be called when a device is removed from the - * running system through hotplug or dlpar. The corresponding - * PCI address cache will be removed. - */ -void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) -{ - struct pci_bus *bus = dev->subordinate; - struct pci_dev *child, *tmp; - - eeh_remove_device(dev, purge_pe); - - if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child, purge_pe); - } -} -EXPORT_SYMBOL_GPL(eeh_remove_bus_device); - static int proc_eeh_show(struct seq_file *m, void *v) { if (0 == eeh_subsystem_enabled) { diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index f9ac1232a746..e8c9fd546a5c 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -68,16 +68,12 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - if (addr < piar->addr_lo) { + if (addr < piar->addr_lo) n = n->rb_left; - } else { - if (addr > piar->addr_hi) { - n = n->rb_right; - } else { - pci_dev_get(piar->pcidev); - return piar->edev; - } - } + else if (addr > piar->addr_hi) + n = n->rb_right; + else + return piar->edev; } return NULL; @@ -156,7 +152,6 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, if (!piar) return NULL; - pci_dev_get(dev); piar->addr_lo = alo; piar->addr_hi = ahi; piar->edev = pci_dev_to_eeh_dev(dev); @@ -250,7 +245,6 @@ restart: if (piar->pcidev == dev) { rb_erase(n, &pci_io_addr_cache_root.rb_root); - pci_dev_put(piar->pcidev); kfree(piar); goto restart; } @@ -302,12 +296,10 @@ void eeh_addr_cache_build(void) if (!edev) continue; - pci_dev_get(dev); /* matching put is in eeh_remove_device() */ dev->dev.archdata.edev = edev; edev->pdev = dev; eeh_addr_cache_insert_dev(dev); - eeh_sysfs_add_device(dev); } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2b1ce17cae50..36bed5a12750 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -143,10 +143,14 @@ static void eeh_disable_irq(struct pci_dev *dev) static void eeh_enable_irq(struct pci_dev *dev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + struct irq_desc *desc; if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { edev->mode &= ~EEH_DEV_IRQ_DISABLED; - enable_irq(dev->irq); + + desc = irq_to_desc(dev->irq); + if (desc && desc->depth > 0) + enable_irq(dev->irq); } } @@ -338,6 +342,54 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_rmv_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + int *removed = (int *)userdata; + + /* + * Actually, we should remove the PCI bridges as well. + * However, that's lots of complexity to do that, + * particularly some of devices under the bridge might + * support EEH. So we just care about PCI devices for + * simplicity here. + */ + if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) + return NULL; + driver = eeh_pcid_get(dev); + if (driver && driver->err_handler) + return NULL; + + /* Remove it from PCI subsystem */ + pr_debug("EEH: Removing %s without EEH sensitive driver\n", + pci_name(dev)); + edev->bus = dev->bus; + edev->mode |= EEH_DEV_DISCONNECTED; + (*removed)++; + + pci_stop_and_remove_bus_device(dev); + + return NULL; +} + +static void *eeh_pe_detach_dev(void *data, void *userdata) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev, *tmp; + + eeh_pe_for_each_dev(pe, edev, tmp) { + if (!(edev->mode & EEH_DEV_DISCONNECTED)) + continue; + + edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); + eeh_rmv_from_parent_pe(edev); + } + + return NULL; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -349,8 +401,9 @@ static void *eeh_report_failure(void *data, void *userdata) */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) { + struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc; + int cnt, rc, removed = 0; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -362,8 +415,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * devices are expected to be attached soon when calling * into pcibios_add_pci_devices(). */ + eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) - __pcibios_remove_pci_devices(bus, 0); + pcibios_remove_pci_devices(bus); + else if (frozen_bus) + eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system @@ -384,9 +440,24 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * potentially weird things happen. */ if (bus) { + pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); ssleep(5); + + /* + * The EEH device is still connected with its parent + * PE. We should disconnect it so the binding can be + * rebuilt when adding PCI devices. + */ + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); pcibios_add_pci_devices(bus); + } else if (frozen_bus && removed) { + pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); + ssleep(5); + + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); + pcibios_add_pci_devices(frozen_bus); } + eeh_pe_state_clear(pe, EEH_PE_KEEP); pe->tstamp = tstamp; pe->freeze_count = cnt; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 016588a6f5ed..f9450537e335 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -149,8 +149,8 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, * callback returns something other than NULL, or no more PEs * to be traversed. */ -static void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) +void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) { struct eeh_pe *pe; void *ret; @@ -176,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag) { struct eeh_pe *pe; - struct eeh_dev *edev; + struct eeh_dev *edev, *tmp; void *ret; if (!root) { @@ -186,7 +186,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, /* Traverse root PE */ for (pe = root; pe; pe = eeh_pe_next(pe, root)) { - eeh_pe_for_each_dev(pe, edev) { + eeh_pe_for_each_dev(pe, edev, tmp) { ret = fn(edev, flag); if (ret) return ret; @@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) while (parent) { if (!(parent->type & EEH_PE_INVALID)) break; - parent->type &= ~EEH_PE_INVALID; + parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); parent = parent->parent; } pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", @@ -397,21 +397,20 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /** * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE * @edev: EEH device - * @purge_pe: remove PE or not * * The PE hierarchy tree might be changed when doing PCI hotplug. * Also, the PCI devices or buses could be removed from the system * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) +int eeh_rmv_from_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; int cnt; if (!edev->pe) { - pr_warning("%s: No PE found for EEH device %s\n", - __func__, edev->dn->full_name); + pr_debug("%s: No PE found for EEH device %s\n", + __func__, edev->dn->full_name); return -EEXIST; } @@ -431,7 +430,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) if (pe->type & EEH_PE_PHB) break; - if (purge_pe) { + if (!(pe->state & EEH_PE_KEEP)) { if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) { list_del(&pe->child); @@ -502,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); - struct eeh_dev *tmp; + struct eeh_dev *edev, *tmp; struct pci_dev *pdev; /* @@ -512,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag) * the PCI device driver. */ pe->state |= state; - eeh_pe_for_each_dev(pe, tmp) { - pdev = eeh_dev_to_pci_dev(tmp); + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); if (pdev) pdev->error_state = pci_channel_io_frozen; } @@ -579,7 +578,7 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state) * blocked on normal path during the stage. So we need utilize * eeh operations, which is always permitted. */ -static void eeh_bridge_check_link(struct pci_dev *pdev, +static void eeh_bridge_check_link(struct eeh_dev *edev, struct device_node *dn) { int cap; @@ -590,16 +589,17 @@ static void eeh_bridge_check_link(struct pci_dev *pdev, * We only check root port and downstream ports of * PCIe switches */ - if (!pci_is_pcie(pdev) || - (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && - pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) + if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT))) return; - pr_debug("%s: Check PCIe link for %s ...\n", - __func__, pci_name(pdev)); + pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", + __func__, edev->phb->global_number, + edev->config_addr >> 8, + PCI_SLOT(edev->config_addr & 0xFF), + PCI_FUNC(edev->config_addr & 0xFF)); /* Check slot status */ - cap = pdev->pcie_cap; + cap = edev->pcie_cap; eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { pr_debug(" No card in the slot (0x%04x) !\n", val); @@ -653,8 +653,7 @@ static void eeh_bridge_check_link(struct pci_dev *pdev, #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) -static void eeh_restore_bridge_bars(struct pci_dev *pdev, - struct eeh_dev *edev, +static void eeh_restore_bridge_bars(struct eeh_dev *edev, struct device_node *dn) { int i; @@ -680,7 +679,7 @@ static void eeh_restore_bridge_bars(struct pci_dev *pdev, eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]); /* Check the PCIe link is ready */ - eeh_bridge_check_link(pdev, dn); + eeh_bridge_check_link(edev, dn); } static void eeh_restore_device_bars(struct eeh_dev *edev, @@ -729,19 +728,12 @@ static void eeh_restore_device_bars(struct eeh_dev *edev, */ static void *eeh_restore_one_device_bars(void *data, void *flag) { - struct pci_dev *pdev = NULL; struct eeh_dev *edev = (struct eeh_dev *)data; struct device_node *dn = eeh_dev_to_of_node(edev); - /* Trace the PCI bridge */ - if (eeh_probe_mode_dev()) { - pdev = eeh_dev_to_pci_dev(edev); - if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - pdev = NULL; - } - - if (pdev) - eeh_restore_bridge_bars(pdev, edev, dn); + /* Do special restore for bridges */ + if (edev->mode & EEH_DEV_BRIDGE) + eeh_restore_bridge_bars(edev, dn); else eeh_restore_device_bars(edev, dn); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e7ae3484918c..5d753d4f2c75 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -56,19 +56,40 @@ EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); void eeh_sysfs_add_device(struct pci_dev *pdev) { + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); int rc=0; + if (edev && (edev->mode & EEH_DEV_SYSFS)) + return; + rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); if (rc) printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + else if (edev) + edev->mode |= EEH_DEV_SYSFS; } void eeh_sysfs_remove_device(struct pci_dev *pdev) { + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + /* + * The parent directory might have been removed. We needn't + * continue for that case. + */ + if (!pdev->dev.kobj.sd) { + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; + return; + } + device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + + if (edev) + edev->mode &= ~EEH_DEV_SYSFS; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f46914a0f33e..7d22a675fe1a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1462,6 +1462,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) + pci_assign_unassigned_bus_resources(bus); /* Fixup EEH */ eeh_add_device_tree_late(bus); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 3f608800c06b..c1e17ae68a08 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -22,45 +22,40 @@ #include <asm/eeh.h> /** - * __pcibios_remove_pci_devices - remove all devices under this bus + * pcibios_release_device - release PCI device + * @dev: PCI device + * + * The function is called before releasing the indicated PCI device. + */ +void pcibios_release_device(struct pci_dev *dev) +{ + eeh_remove_device(dev); +} + +/** + * pcibios_remove_pci_devices - remove all devices under this bus * @bus: the indicated PCI bus - * @purge_pe: destroy the PE on removal of PCI devices * * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. - * By default, the corresponding PE will be destroied during the - * normal PCI hotplug path. For PCI hotplug during EEH recovery, - * the corresponding PE won't be destroied and deallocated. */ -void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) +void pcibios_remove_pci_devices(struct pci_bus *bus) { struct pci_dev *dev, *tmp; struct pci_bus *child_bus; /* First go down child busses */ list_for_each_entry(child_bus, &bus->children, node) - __pcibios_remove_pci_devices(child_bus, purge_pe); + pcibios_remove_pci_devices(child_bus); pr_debug("PCI: Removing devices on bus %04x:%02x\n", pci_domain_nr(bus), bus->number); list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev, purge_pe); + pr_debug(" Removing %s...\n", pci_name(dev)); pci_stop_and_remove_bus_device(dev); } } -/** - * pcibios_remove_pci_devices - remove all devices under this bus - * @bus: the indicated PCI bus - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - */ -void pcibios_remove_pci_devices(struct pci_bus *bus) -{ - __pcibios_remove_pci_devices(bus, 1); -} EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); /** @@ -76,7 +71,7 @@ EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); */ void pcibios_add_pci_devices(struct pci_bus * bus) { - int slotno, num, mode, pass, max; + int slotno, mode, pass, max; struct pci_dev *dev; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -90,11 +85,15 @@ void pcibios_add_pci_devices(struct pci_bus * bus) /* use ofdt-based probe */ of_rescan_bus(dn, bus); } else if (mode == PCI_PROBE_NORMAL) { - /* use legacy probe */ + /* + * Use legacy probe. In the partial hotplug case, we + * probably have grandchildren devices unplugged. So + * we don't check the return value from pci_scan_slot() in + * order for fully rescan all the way down to pick them up. + * They can have been removed during partial hotplug. + */ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (!num) - return; + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); pcibios_setup_bus_devices(bus); max = bus->busn_res.start; for (pass = 0; pass < 2; pass++) { diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 6b0ba5854d99..15d9105323bf 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -230,11 +230,14 @@ void of_scan_pci_bridge(struct pci_dev *dev) return; } - bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]); if (!bus) { - printk(KERN_ERR "Failed to create pci bus for %s\n", - node->full_name); - return; + bus = pci_add_new_bus(dev->bus, dev, busrange[0]); + if (!bus) { + printk(KERN_ERR "Failed to create pci bus for %s\n", + node->full_name); + return; + } } bus->primary = dev->bus->number; @@ -292,6 +295,38 @@ void of_scan_pci_bridge(struct pci_dev *dev) } EXPORT_SYMBOL(of_scan_pci_bridge); +static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, + struct device_node *dn) +{ + struct pci_dev *dev = NULL; + const u32 *reg; + int reglen, devfn; + + pr_debug(" * %s\n", dn->full_name); + if (!of_device_is_available(dn)) + return NULL; + + reg = of_get_property(dn, "reg", ®len); + if (reg == NULL || reglen < 20) + return NULL; + devfn = (reg[0] >> 8) & 0xff; + + /* Check if the PCI device is already there */ + dev = pci_get_slot(bus, devfn); + if (dev) { + pci_dev_put(dev); + return dev; + } + + /* create a new pci_dev for this device */ + dev = of_create_pci_dev(dn, bus, devfn); + if (!dev) + return NULL; + + pr_debug(" dev header type: %x\n", dev->hdr_type); + return dev; +} + /** * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices * @node: device tree node for the PCI bus @@ -302,8 +337,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, int rescan_existing) { struct device_node *child; - const u32 *reg; - int reglen, devfn; struct pci_dev *dev; pr_debug("of_scan_bus(%s) bus no %d...\n", @@ -311,16 +344,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, /* Scan direct children */ for_each_child_of_node(node, child) { - pr_debug(" * %s\n", child->full_name); - if (!of_device_is_available(child)) - continue; - reg = of_get_property(child, "reg", ®len); - if (reg == NULL || reglen < 20) - continue; - devfn = (reg[0] >> 8) & 0xff; - - /* create a new pci_dev for this device */ - dev = of_create_pci_dev(child, bus, devfn); + dev = of_scan_pci_dev(bus, child); if (!dev) continue; pr_debug(" dev header type: %x\n", dev->hdr_type); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5eccda9fd33f..607902424e73 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -644,7 +644,8 @@ unsigned char ibm_architecture_vec[] = { W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ W(0xffff0000), W(0x003e0000), /* POWER6 */ W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8 */ + W(0xffff0000), W(0x004b0000), /* POWER8E */ + W(0xffff0000), W(0x004d0000), /* POWER8 */ W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ @@ -706,7 +707,7 @@ unsigned char ibm_architecture_vec[] = { * must match by the macro below. Update the definition if * the structure layout changes. */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 117 +#define IBM_ARCH_VEC_NRCORES_OFFSET 125 W(NR_CPUS), /* number of cores supported */ 0, 0, diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 654e479802f2..f096e72262f4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - . = 0; - reloc_start = .; - . = KERNELBASE; /* diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3f0c30ae4791..c33d939120c9 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -43,6 +43,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long va; unsigned int penc; + unsigned long sllp; /* * We need 14 to 65 bits of va for a tlibe of 4K page @@ -64,7 +65,9 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) /* clear out bits after (52) [0....52.....63] */ va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; - va |= mmu_psize_defs[apsize].sllp << 6; + sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | + ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); + va |= sllp << 5; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); @@ -98,6 +101,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long va; unsigned int penc; + unsigned long sllp; /* VPN_SHIFT can be atmost 12 */ va = vpn << VPN_SHIFT; @@ -113,7 +117,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) /* clear out bits after(52) [0....52.....63] */ va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; - va |= mmu_psize_defs[apsize].sllp << 6; + sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | + ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); + va |= sllp << 5; asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" : : "r"(va) : "memory"); break; @@ -554,6 +560,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, seg_off |= vpi << shift; } *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; + break; case MMU_SEGSIZE_1T: /* We only have 40 - 23 bits of seg_off in avpn */ seg_off = (avpn & 0x1ffff) << 23; @@ -563,6 +570,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, seg_off |= vpi << shift; } *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; + break; default: *vpn = size = 0; } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a3985aee77fe..24a45f91c65f 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1252,8 +1252,11 @@ nocheck: ret = 0; out: - if (has_branch_stack(event)) + if (has_branch_stack(event)) { power_pmu_bhrb_enable(event); + cpuhw->bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + } perf_pmu_enable(event->pmu); local_irq_restore(flags); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 96a64d6a8bdf..7466374d2787 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -561,18 +561,13 @@ static int power8_generic_events[] = { static u64 power8_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; - u64 br_privilege = branch_sample_type & ONLY_PLM; - /* BHRB and regular PMU events share the same prvillege state + /* BHRB and regular PMU events share the same privilege state * filter configuration. BHRB is always recorded along with a - * regular PMU event. So privilege state filter criteria for BHRB - * and the companion PMU events has to be the same. As a default - * "perf record" tool sets all privillege bits ON when no filter - * criteria is provided in the command line. So as along as all - * privillege bits are ON or they are OFF, we are good to go. + * regular PMU event. As the privilege state filter is handled + * in the basic PMC configuration of the accompanying regular + * PMU event, we ignore any separate BHRB specific request. */ - if ((br_privilege != 7) && (br_privilege != 0)) - return -1; /* No branch filter requested */ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) @@ -621,10 +616,19 @@ static struct power_pmu power8_pmu = { static int __init init_power8_pmu(void) { + int rc; + if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) return -ENODEV; - return register_power_pmu(&power8_pmu); + rc = register_power_pmu(&power8_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; } early_initcall(init_power8_pmu); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 969cce73055a..79663d26e6ea 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -114,7 +114,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) * the root bridge. So it's not reasonable to continue * the probing. */ - if (!dn || !edev) + if (!dn || !edev || edev->pe) return 0; /* Skip for PCI-ISA bridge */ @@ -122,8 +122,19 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) return 0; /* Initialize eeh device */ - edev->class_code = dev->class; - edev->mode = 0; + edev->class_code = dev->class; + edev->mode &= 0xFFFFFF00; + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + edev->mode |= EEH_DEV_BRIDGE; + if (pci_is_pcie(dev)) { + edev->pcie_cap = pci_pcie_cap(dev); + + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + edev->config_addr = ((dev->bus->number << 8) | dev->devfn); edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 49b57b9f835d..d8140b125e62 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1266,7 +1266,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE); } -void pnv_pci_init_ioda2_phb(struct device_node *np) +void __init pnv_pci_init_ioda2_phb(struct device_node *np) { pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 1bd3399146ed..62b4f8025de0 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -19,7 +19,6 @@ config PPC_PSERIES select ZLIB_DEFLATE select PPC_DOORBELL select HAVE_CONTEXT_TRACKING - select HOTPLUG if SMP select HOTPLUG_CPU if SMP default y diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b456b157d33d..7fbc25b1813f 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -133,6 +133,48 @@ static int pseries_eeh_init(void) return 0; } +static int pseries_eeh_cap_start(struct device_node *dn) +{ + struct pci_dn *pdn = PCI_DN(dn); + u32 status; + + if (!pdn) + return 0; + + rtas_read_config(pdn, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + return PCI_CAPABILITY_LIST; +} + + +static int pseries_eeh_find_cap(struct device_node *dn, int cap) +{ + struct pci_dn *pdn = PCI_DN(dn); + int pos = pseries_eeh_cap_start(dn); + int cnt = 48; /* Maximal number of capabilities */ + u32 id; + + if (!pos) + return 0; + + while (cnt--) { + rtas_read_config(pdn, pos, 1, &pos); + if (pos < 0x40) + break; + pos &= ~3; + rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + + return 0; +} + /** * pseries_eeh_of_probe - EEH probe on the given device * @dn: OF node @@ -146,14 +188,16 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) { struct eeh_dev *edev; struct eeh_pe pe; + struct pci_dn *pdn = PCI_DN(dn); const u32 *class_code, *vendor_id, *device_id; const u32 *regs; + u32 pcie_flags; int enable = 0; int ret; /* Retrieve OF node and eeh device */ edev = of_node_to_eeh_dev(dn); - if (!of_device_is_available(dn)) + if (edev->pe || !of_device_is_available(dn)) return NULL; /* Retrieve class/vendor/device IDs */ @@ -167,9 +211,26 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) if (dn->type && !strcmp(dn->type, "isa")) return NULL; - /* Update class code and mode of eeh device */ + /* + * Update class code and mode of eeh device. We need + * correctly reflects that current device is root port + * or PCIe switch downstream port. + */ edev->class_code = *class_code; - edev->mode = 0; + edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); + edev->mode &= 0xFFFFFF00; + if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + edev->mode |= EEH_DEV_BRIDGE; + if (edev->pcie_cap) { + rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); + pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; + if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + } /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 02d6e21619bb..8bad880bd177 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -146,7 +146,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, flags = 0; /* Make pHyp happy */ - if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) + if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~_PAGE_COHERENT; if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) flags |= H_COALESCE_CAND; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 7b3cbde8c783..721c0586b284 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -287,6 +287,9 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) unsigned long *savep; struct rtas_error_log *h, *errhdr = NULL; + /* Mask top two bits */ + regs->gpr[3] &= ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 7d6ba9db1be9..6c63c358a7e6 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) @@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o -crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S deleted file mode 100644 index 35e97569d05f..000000000000 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ /dev/null @@ -1,643 +0,0 @@ -######################################################################## -# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions -# -# Copyright (c) 2013, Intel Corporation -# -# Authors: -# Erdinc Ozturk <erdinc.ozturk@intel.com> -# Vinodh Gopal <vinodh.gopal@intel.com> -# James Guilford <james.guilford@intel.com> -# Tim Chen <tim.c.chen@linux.intel.com> -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# OpenIB.org BSD license below: -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the -# distribution. -# -# * Neither the name of the Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -######################################################################## -# Function API: -# UINT16 crc_t10dif_pcl( -# UINT16 init_crc, //initial CRC value, 16 bits -# const unsigned char *buf, //buffer pointer to calculate CRC on -# UINT64 len //buffer length in bytes (64-bit data) -# ); -# -# Reference paper titled "Fast CRC Computation for Generic -# Polynomials Using PCLMULQDQ Instruction" -# URL: http://www.intel.com/content/dam/www/public/us/en/documents -# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -# -# - -#include <linux/linkage.h> - -.text - -#define arg1 %rdi -#define arg2 %rsi -#define arg3 %rdx - -#define arg1_low32 %edi - -ENTRY(crc_t10dif_pcl) -.align 16 - - # adjust the 16-bit initial_crc value, scale it to 32 bits - shl $16, arg1_low32 - - # Allocate Stack Space - mov %rsp, %rcx - sub $16*2, %rsp - # align stack to 16 byte boundary - and $~(0x10 - 1), %rsp - - # check if smaller than 256 - cmp $256, arg3 - - # for sizes less than 128, we can't fold 64B at a time... - jl _less_than_128 - - - # load the initial crc value - movd arg1_low32, %xmm10 # initial crc - - # crc value does not need to be byte-reflected, but it needs - # to be moved to the high part of the register. - # because data will be byte-reflected and will align with - # initial crc at correct place. - pslldq $12, %xmm10 - - movdqa SHUF_MASK(%rip), %xmm11 - # receive the initial 64B data, xor the initial crc value - movdqu 16*0(arg2), %xmm0 - movdqu 16*1(arg2), %xmm1 - movdqu 16*2(arg2), %xmm2 - movdqu 16*3(arg2), %xmm3 - movdqu 16*4(arg2), %xmm4 - movdqu 16*5(arg2), %xmm5 - movdqu 16*6(arg2), %xmm6 - movdqu 16*7(arg2), %xmm7 - - pshufb %xmm11, %xmm0 - # XOR the initial_crc value - pxor %xmm10, %xmm0 - pshufb %xmm11, %xmm1 - pshufb %xmm11, %xmm2 - pshufb %xmm11, %xmm3 - pshufb %xmm11, %xmm4 - pshufb %xmm11, %xmm5 - pshufb %xmm11, %xmm6 - pshufb %xmm11, %xmm7 - - movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 - #imm value of pclmulqdq instruction - #will determine which constant to use - - ################################################################# - # we subtract 256 instead of 128 to save one instruction from the loop - sub $256, arg3 - - # at this section of the code, there is 64*x+y (0<=y<64) bytes of - # buffer. The _fold_64_B_loop will fold 64B at a time - # until we have 64+y Bytes of buffer - - - # fold 64B at a time. This section of the code folds 4 xmm - # registers in parallel -_fold_64_B_loop: - - # update the buffer pointer - add $128, arg2 # buf += 64# - - movdqu 16*0(arg2), %xmm9 - movdqu 16*1(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm0, %xmm8 - movdqa %xmm1, %xmm13 - pclmulqdq $0x0 , %xmm10, %xmm0 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0 , %xmm10, %xmm1 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm0 - xorps %xmm8 , %xmm0 - pxor %xmm12, %xmm1 - xorps %xmm13, %xmm1 - - movdqu 16*2(arg2), %xmm9 - movdqu 16*3(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm2, %xmm8 - movdqa %xmm3, %xmm13 - pclmulqdq $0x0, %xmm10, %xmm2 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0, %xmm10, %xmm3 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm2 - xorps %xmm8 , %xmm2 - pxor %xmm12, %xmm3 - xorps %xmm13, %xmm3 - - movdqu 16*4(arg2), %xmm9 - movdqu 16*5(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm4, %xmm8 - movdqa %xmm5, %xmm13 - pclmulqdq $0x0, %xmm10, %xmm4 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0, %xmm10, %xmm5 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm4 - xorps %xmm8 , %xmm4 - pxor %xmm12, %xmm5 - xorps %xmm13, %xmm5 - - movdqu 16*6(arg2), %xmm9 - movdqu 16*7(arg2), %xmm12 - pshufb %xmm11, %xmm9 - pshufb %xmm11, %xmm12 - movdqa %xmm6 , %xmm8 - movdqa %xmm7 , %xmm13 - pclmulqdq $0x0 , %xmm10, %xmm6 - pclmulqdq $0x11, %xmm10, %xmm8 - pclmulqdq $0x0 , %xmm10, %xmm7 - pclmulqdq $0x11, %xmm10, %xmm13 - pxor %xmm9 , %xmm6 - xorps %xmm8 , %xmm6 - pxor %xmm12, %xmm7 - xorps %xmm13, %xmm7 - - sub $128, arg3 - - # check if there is another 64B in the buffer to be able to fold - jge _fold_64_B_loop - ################################################################## - - - add $128, arg2 - # at this point, the buffer pointer is pointing at the last y Bytes - # of the buffer the 64B of folded data is in 4 of the xmm - # registers: xmm0, xmm1, xmm2, xmm3 - - - # fold the 8 xmm registers to 1 xmm register with different constants - - movdqa rk9(%rip), %xmm10 - movdqa %xmm0, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm0 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm0, %xmm7 - - movdqa rk11(%rip), %xmm10 - movdqa %xmm1, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm1 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm1, %xmm7 - - movdqa rk13(%rip), %xmm10 - movdqa %xmm2, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm2 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm2, %xmm7 - - movdqa rk15(%rip), %xmm10 - movdqa %xmm3, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm3 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm3, %xmm7 - - movdqa rk17(%rip), %xmm10 - movdqa %xmm4, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm4 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm4, %xmm7 - - movdqa rk19(%rip), %xmm10 - movdqa %xmm5, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm5 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - xorps %xmm5, %xmm7 - - movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 - #imm value of pclmulqdq instruction - #will determine which constant to use - movdqa %xmm6, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm6 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm6, %xmm7 - - - # instead of 64, we add 48 to the loop counter to save 1 instruction - # from the loop instead of a cmp instruction, we use the negative - # flag with the jl instruction - add $128-16, arg3 - jl _final_reduction_for_128 - - # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 - # and the rest is in memory. We can fold 16 bytes at a time if y>=16 - # continue folding 16B at a time - -_16B_reduction_loop: - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm7 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - movdqu (arg2), %xmm0 - pshufb %xmm11, %xmm0 - pxor %xmm0 , %xmm7 - add $16, arg2 - sub $16, arg3 - # instead of a cmp instruction, we utilize the flags with the - # jge instruction equivalent of: cmp arg3, 16-16 - # check if there is any more 16B in the buffer to be able to fold - jge _16B_reduction_loop - - #now we have 16+z bytes left to reduce, where 0<= z < 16. - #first, we reduce the data in the xmm7 register - - -_final_reduction_for_128: - # check if any more data to fold. If not, compute the CRC of - # the final 128 bits - add $16, arg3 - je _128_done - - # here we are getting data that is less than 16 bytes. - # since we know that there was data before the pointer, we can - # offset the input pointer before the actual point, to receive - # exactly 16 bytes. after that the registers need to be adjusted. -_get_last_two_xmms: - movdqa %xmm7, %xmm2 - - movdqu -16(arg2, arg3), %xmm1 - pshufb %xmm11, %xmm1 - - # get rid of the extra data that was loaded before - # load the shift constant - lea pshufb_shf_table+16(%rip), %rax - sub arg3, %rax - movdqu (%rax), %xmm0 - - # shift xmm2 to the left by arg3 bytes - pshufb %xmm0, %xmm2 - - # shift xmm7 to the right by 16-arg3 bytes - pxor mask1(%rip), %xmm0 - pshufb %xmm0, %xmm7 - pblendvb %xmm2, %xmm1 #xmm0 is implicit - - # fold 16 Bytes - movdqa %xmm1, %xmm2 - movdqa %xmm7, %xmm8 - pclmulqdq $0x11, %xmm10, %xmm7 - pclmulqdq $0x0 , %xmm10, %xmm8 - pxor %xmm8, %xmm7 - pxor %xmm2, %xmm7 - -_128_done: - # compute crc of a 128-bit value - movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 - movdqa %xmm7, %xmm0 - - #64b fold - pclmulqdq $0x1, %xmm10, %xmm7 - pslldq $8 , %xmm0 - pxor %xmm0, %xmm7 - - #32b fold - movdqa %xmm7, %xmm0 - - pand mask2(%rip), %xmm0 - - psrldq $12, %xmm7 - pclmulqdq $0x10, %xmm10, %xmm7 - pxor %xmm0, %xmm7 - - #barrett reduction -_barrett: - movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 - movdqa %xmm7, %xmm0 - pclmulqdq $0x01, %xmm10, %xmm7 - pslldq $4, %xmm7 - pclmulqdq $0x11, %xmm10, %xmm7 - - pslldq $4, %xmm7 - pxor %xmm0, %xmm7 - pextrd $1, %xmm7, %eax - -_cleanup: - # scale the result back to 16 bits - shr $16, %eax - mov %rcx, %rsp - ret - -######################################################################## - -.align 16 -_less_than_128: - - # check if there is enough buffer to be able to fold 16B at a time - cmp $32, arg3 - jl _less_than_32 - movdqa SHUF_MASK(%rip), %xmm11 - - # now if there is, load the constants - movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 - - movd arg1_low32, %xmm0 # get the initial crc value - pslldq $12, %xmm0 # align it to its correct place - movdqu (arg2), %xmm7 # load the plaintext - pshufb %xmm11, %xmm7 # byte-reflect the plaintext - pxor %xmm0, %xmm7 - - - # update the buffer pointer - add $16, arg2 - - # update the counter. subtract 32 instead of 16 to save one - # instruction from the loop - sub $32, arg3 - - jmp _16B_reduction_loop - - -.align 16 -_less_than_32: - # mov initial crc to the return value. this is necessary for - # zero-length buffers. - mov arg1_low32, %eax - test arg3, arg3 - je _cleanup - - movdqa SHUF_MASK(%rip), %xmm11 - - movd arg1_low32, %xmm0 # get the initial crc value - pslldq $12, %xmm0 # align it to its correct place - - cmp $16, arg3 - je _exact_16_left - jl _less_than_16_left - - movdqu (arg2), %xmm7 # load the plaintext - pshufb %xmm11, %xmm7 # byte-reflect the plaintext - pxor %xmm0 , %xmm7 # xor the initial crc value - add $16, arg2 - sub $16, arg3 - movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 - jmp _get_last_two_xmms - - -.align 16 -_less_than_16_left: - # use stack space to load data less than 16 bytes, zero-out - # the 16B in memory first. - - pxor %xmm1, %xmm1 - mov %rsp, %r11 - movdqa %xmm1, (%r11) - - cmp $4, arg3 - jl _only_less_than_4 - - # backup the counter value - mov arg3, %r9 - cmp $8, arg3 - jl _less_than_8_left - - # load 8 Bytes - mov (arg2), %rax - mov %rax, (%r11) - add $8, %r11 - sub $8, arg3 - add $8, arg2 -_less_than_8_left: - - cmp $4, arg3 - jl _less_than_4_left - - # load 4 Bytes - mov (arg2), %eax - mov %eax, (%r11) - add $4, %r11 - sub $4, arg3 - add $4, arg2 -_less_than_4_left: - - cmp $2, arg3 - jl _less_than_2_left - - # load 2 Bytes - mov (arg2), %ax - mov %ax, (%r11) - add $2, %r11 - sub $2, arg3 - add $2, arg2 -_less_than_2_left: - cmp $1, arg3 - jl _zero_left - - # load 1 Byte - mov (arg2), %al - mov %al, (%r11) -_zero_left: - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - # shl r9, 4 - lea pshufb_shf_table+16(%rip), %rax - sub %r9, %rax - movdqu (%rax), %xmm0 - pxor mask1(%rip), %xmm0 - - pshufb %xmm0, %xmm7 - jmp _128_done - -.align 16 -_exact_16_left: - movdqu (arg2), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - jmp _128_done - -_only_less_than_4: - cmp $3, arg3 - jl _only_less_than_3 - - # load 3 Bytes - mov (arg2), %al - mov %al, (%r11) - - mov 1(arg2), %al - mov %al, 1(%r11) - - mov 2(arg2), %al - mov %al, 2(%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $5, %xmm7 - - jmp _barrett -_only_less_than_3: - cmp $2, arg3 - jl _only_less_than_2 - - # load 2 Bytes - mov (arg2), %al - mov %al, (%r11) - - mov 1(arg2), %al - mov %al, 1(%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $6, %xmm7 - - jmp _barrett -_only_less_than_2: - - # load 1 Byte - mov (arg2), %al - mov %al, (%r11) - - movdqa (%rsp), %xmm7 - pshufb %xmm11, %xmm7 - pxor %xmm0 , %xmm7 # xor the initial crc value - - psrldq $7, %xmm7 - - jmp _barrett - -ENDPROC(crc_t10dif_pcl) - -.data - -# precomputed constants -# these constants are precomputed from the poly: -# 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 -# Q = 0x18BB70000 -# rk1 = 2^(32*3) mod Q << 32 -# rk2 = 2^(32*5) mod Q << 32 -# rk3 = 2^(32*15) mod Q << 32 -# rk4 = 2^(32*17) mod Q << 32 -# rk5 = 2^(32*3) mod Q << 32 -# rk6 = 2^(32*2) mod Q << 32 -# rk7 = floor(2^64/Q) -# rk8 = Q -rk1: -.quad 0x2d56000000000000 -rk2: -.quad 0x06df000000000000 -rk3: -.quad 0x9d9d000000000000 -rk4: -.quad 0x7cf5000000000000 -rk5: -.quad 0x2d56000000000000 -rk6: -.quad 0x1368000000000000 -rk7: -.quad 0x00000001f65a57f8 -rk8: -.quad 0x000000018bb70000 - -rk9: -.quad 0xceae000000000000 -rk10: -.quad 0xbfd6000000000000 -rk11: -.quad 0x1e16000000000000 -rk12: -.quad 0x713c000000000000 -rk13: -.quad 0xf7f9000000000000 -rk14: -.quad 0x80a6000000000000 -rk15: -.quad 0x044c000000000000 -rk16: -.quad 0xe658000000000000 -rk17: -.quad 0xad18000000000000 -rk18: -.quad 0xa497000000000000 -rk19: -.quad 0x6ee3000000000000 -rk20: -.quad 0xe7b5000000000000 - - - -mask1: -.octa 0x80808080808080808080808080808080 -mask2: -.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF - -SHUF_MASK: -.octa 0x000102030405060708090A0B0C0D0E0F - -pshufb_shf_table: -# use these values for shift constants for the pshufb instruction -# different alignments result in values as shown: -# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 -# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 -# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 -# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 -# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 -# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 -# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 -# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 -# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 -# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 -# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 -# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 -# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 -# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 -# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 -.octa 0x8f8e8d8c8b8a89888786858483828100 -.octa 0x000e0d0c0b0a09080706050403020100 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c deleted file mode 100644 index 7845d7fd54c0..000000000000 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions - * - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <asm/i387.h> -#include <asm/cpufeature.h> -#include <asm/cpu_device_id.h> - -asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, - size_t len); - -struct chksum_desc_ctx { - __u16 crc; -}; - -/* - * Steps through buffer one byte at at time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - if (irq_fpu_usable()) { - kernel_fpu_begin(); - ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); - kernel_fpu_end(); - } else - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - if (irq_fpu_usable()) { - kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); - kernel_fpu_end(); - } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-pclmul", - .cra_priority = 200, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static const struct x86_cpu_id crct10dif_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); - -static int __init crct10dif_intel_mod_init(void) -{ - if (!x86_match_cpu(crct10dif_cpu_id)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crct10dif_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_intel_mod_init); -module_exit(crct10dif_intel_mod_fini); - -MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); -MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS("crct10dif"); -MODULE_ALIAS("crct10dif-pclmul"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 69ce573f1224..aca01164f002 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -376,25 +376,6 @@ config CRYPTO_CRC32_PCLMUL which will enable any routine to use the CRC-32-IEEE 802.3 checksum and gain better performance as compared with the table implementation. -config CRYPTO_CRCT10DIF - tristate "CRCT10DIF algorithm" - select CRYPTO_HASH - help - CRC T10 Data Integrity Field computation is being cast as - a crypto transform. This allows for faster crc t10 diff - transforms to be used if they are available. - -config CRYPTO_CRCT10DIF_PCLMUL - tristate "CRCT10DIF PCLMULQDQ hardware acceleration" - depends on X86 && 64BIT && CRC_T10DIF - select CRYPTO_HASH - help - For x86_64 processors with SSE4.2 and PCLMULQDQ supported, - CRC T10 DIF PCLMULQDQ computation can be hardware - accelerated PCLMULQDQ instruction. This option will create - 'crct10dif-plcmul' module, which is faster when computing the - crct10dif checksum as compared with the generic table implementation. - config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL diff --git a/crypto/Makefile b/crypto/Makefile index 2d5ed08a239f..2ba0df2f908f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o -obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c deleted file mode 100644 index 92aca96d6b98..000000000000 --- a/crypto/crct10dif.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform - * - * Copyright (c) 2007 Oracle Corporation. All rights reserved. - * Written by Martin K. Petersen <martin.petersen@oracle.com> - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/kernel.h> - -struct chksum_desc_ctx { - __u16 crc; -}; - -/* Table generated using the following polynomium: - * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 - * gt: 0x8bb7 - */ -static const __u16 t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) -{ - unsigned int i; - - for (i = 0 ; i < len ; i++) - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; - - return crc; -} -EXPORT_SYMBOL(crc_t10dif_generic); - -/* - * Steps through buffer one byte at at time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-generic", - .cra_priority = 100, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init crct10dif_mod_init(void) -{ - int ret; - - ret = crypto_register_shash(&alg); - return ret; -} - -static void __exit crct10dif_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_mod_init); -module_exit(crct10dif_mod_fini); - -MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); -MODULE_DESCRIPTION("T10 DIF CRC calculation."); -MODULE_LICENSE("GPL"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 25a5934f0e50..66d254ce0d11 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1174,10 +1174,6 @@ static int do_test(int m) ret += tcrypt_test("ghash"); break; - case 47: - ret += tcrypt_test("crct10dif"); - break; - case 100: ret += tcrypt_test("hmac(md5)"); break; @@ -1502,10 +1498,6 @@ static int do_test(int m) test_hash_speed("crc32c", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; - case 320: - test_hash_speed("crct10dif", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - case 399: break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 2f00607039e2..ecddf921a9db 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2046,16 +2046,6 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { - .alg = "crct10dif", - .test = alg_test_hash, - .fips_allowed = 1, - .suite = { - .hash = { - .vecs = crct10dif_tv_template, - .count = CRCT10DIF_TEST_VECTORS - } - } - }, { .alg = "cryptd(__driver-cbc-aes-aesni)", .test = alg_test_null, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 7d44aa3d6b44..1e701bc075b9 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -450,39 +450,6 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS 3 -static struct hash_testvec crct10dif_tv_template[] = { - { - .plaintext = "abc", - .psize = 3, -#ifdef __LITTLE_ENDIAN - .digest = "\x3b\x44", -#else - .digest = "\x44\x3b", -#endif - }, { - .plaintext = "1234567890123456789012345678901234567890" - "123456789012345678901234567890123456789", - .psize = 79, -#ifdef __LITTLE_ENDIAN - .digest = "\x70\x4b", -#else - .digest = "\x4b\x70", -#endif - }, { - .plaintext = - "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", - .psize = 56, -#ifdef __LITTLE_ENDIAN - .digest = "\xe3\x9c", -#else - .digest = "\x9c\xe3", -#endif - .np = 2, - .tap = { 28, 28 } - } -}; - /* * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 227aca77ee1e..5da44e81dd4d 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -169,10 +169,8 @@ int acpi_create_platform_device(struct acpi_device *adev, -------------------------------------------------------------------------- */ #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) bool acpi_video_backlight_quirks(void); -bool acpi_video_verify_backlight_support(void); #else static inline bool acpi_video_backlight_quirks(void) { return false; } -static inline bool acpi_video_verify_backlight_support(void) { return false; } #endif #endif /* _ACPI_INTERNAL_H_ */ diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 6dd237e79b4f..0ec434d2586d 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -911,7 +911,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device) if (acpi_video_init_brightness(device)) return; - if (acpi_video_verify_backlight_support()) { + if (acpi_video_backlight_support()) { struct backlight_properties props; struct pci_dev *pdev; acpi_handle acpi_parent; @@ -1366,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event) unsigned long long level_current, level_next; int result = -EINVAL; - /* no warning message if acpi_backlight=vendor or a quirk is used */ - if (!acpi_video_verify_backlight_support()) + /* no warning message if acpi_backlight=vendor is used */ + if (!acpi_video_backlight_support()) return 0; if (!device->brightness) @@ -1875,46 +1875,6 @@ static int acpi_video_bus_remove(struct acpi_device *device) return 0; } -static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl, - void *context, void **rv) -{ - struct acpi_device *acpi_dev; - struct acpi_video_bus *video; - struct acpi_video_device *dev, *next; - - if (acpi_bus_get_device(handle, &acpi_dev)) - return AE_OK; - - if (acpi_match_device_ids(acpi_dev, video_device_ids)) - return AE_OK; - - video = acpi_driver_data(acpi_dev); - if (!video) - return AE_OK; - - acpi_video_bus_stop_devices(video); - mutex_lock(&video->device_list_lock); - list_for_each_entry_safe(dev, next, &video->video_device_list, entry) { - if (dev->backlight) { - backlight_device_unregister(dev->backlight); - dev->backlight = NULL; - kfree(dev->brightness->levels); - kfree(dev->brightness); - } - if (dev->cooling_dev) { - sysfs_remove_link(&dev->dev->dev.kobj, - "thermal_cooling"); - sysfs_remove_link(&dev->cooling_dev->device.kobj, - "device"); - thermal_cooling_device_unregister(dev->cooling_dev); - dev->cooling_dev = NULL; - } - } - mutex_unlock(&video->device_list_lock); - acpi_video_bus_start_devices(video); - return AE_OK; -} - static int __init is_i740(struct pci_dev *dev) { if (dev->device == 0x00D1) @@ -1946,25 +1906,14 @@ static int __init intel_opregion_present(void) return opregion; } -int __acpi_video_register(bool backlight_quirks) +int acpi_video_register(void) { - bool no_backlight; - int result; - - no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false; - + int result = 0; if (register_count) { /* - * If acpi_video_register() has been called already, don't try - * to register acpi_video_bus, but unregister backlight devices - * if no backlight support is requested. + * if the function of acpi_video_register is already called, + * don't register the acpi_vide_bus again and return no error. */ - if (no_backlight) - acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, - video_unregister_backlight, - NULL, NULL, NULL); - return 0; } @@ -1980,7 +1929,7 @@ int __acpi_video_register(bool backlight_quirks) return 0; } -EXPORT_SYMBOL(__acpi_video_register); +EXPORT_SYMBOL(acpi_video_register); void acpi_video_unregister(void) { diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 826e52def080..c3397748ba46 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -238,12 +238,7 @@ static void acpi_video_caps_check(void) bool acpi_video_backlight_quirks(void) { - if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) { - acpi_video_caps_check(); - acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT; - return true; - } - return false; + return acpi_gbl_osi_data >= ACPI_OSI_WIN_8; } EXPORT_SYMBOL(acpi_video_backlight_quirks); @@ -291,14 +286,6 @@ int acpi_video_backlight_support(void) } EXPORT_SYMBOL(acpi_video_backlight_support); -/* For the ACPI video driver use only. */ -bool acpi_video_verify_backlight_support(void) -{ - return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ? - false : acpi_video_backlight_support(); -} -EXPORT_SYMBOL(acpi_video_verify_backlight_support); - /* * Use acpi_backlight=vendor/video to force that backlight switching * is processed by vendor specific acpi drivers or video.ko driver. diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 80dc988f01e4..4e737728aee2 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -97,6 +97,15 @@ config SATA_AHCI_PLATFORM If unsure, say N. +config AHCI_IMX + tristate "Freescale i.MX AHCI SATA support" + depends on SATA_AHCI_PLATFORM && MFD_SYSCON + help + This option enables support for the Freescale i.MX SoC's + onboard AHCI SATA. + + If unsure, say N. + config SATA_FSL tristate "Freescale 3.0Gbps SATA support" depends on FSL_SOC @@ -107,7 +116,7 @@ config SATA_FSL If unsure, say N. config SATA_INIC162X - tristate "Initio 162x SATA support" + tristate "Initio 162x SATA support (Very Experimental)" depends on PCI help This option enables support for Initio 162x Serial ATA. diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index c04d0fd038a3..46518c622460 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o obj-$(CONFIG_SATA_SIL24) += sata_sil24.o obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o +obj-$(CONFIG_AHCI_IMX) += ahci_imx.o # SFF w/ custom DMA obj-$(CONFIG_PDC_ADMA) += pdc_adma.o diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 5064f3ea20f1..db4380d70031 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1146,11 +1146,18 @@ int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis) return rc; for (i = 0; i < host->n_ports; i++) { + const char* desc; struct ahci_port_priv *pp = host->ports[i]->private_data; + /* pp is NULL for dummy ports */ + if (pp) + desc = pp->irq_desc; + else + desc = dev_driver_string(host->dev); + rc = devm_request_threaded_irq(host->dev, irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED, - pp->irq_desc, host->ports[i]); + desc, host->ports[i]); if (rc) goto out_free_irqs; } diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c new file mode 100644 index 000000000000..58debb0acc3a --- /dev/null +++ b/drivers/ata/ahci_imx.c @@ -0,0 +1,236 @@ +/* + * Freescale IMX AHCI SATA platform driver + * Copyright 2013 Freescale Semiconductor, Inc. + * + * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/ahci_platform.h> +#include <linux/of_device.h> +#include <linux/mfd/syscon.h> +#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> +#include "ahci.h" + +enum { + HOST_TIMER1MS = 0xe0, /* Timer 1-ms */ +}; + +struct imx_ahci_priv { + struct platform_device *ahci_pdev; + struct clk *sata_ref_clk; + struct clk *ahb_clk; + struct regmap *gpr; +}; + +static int imx6q_sata_init(struct device *dev, void __iomem *mmio) +{ + int ret = 0; + unsigned int reg_val; + struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); + + imxpriv->gpr = + syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + if (IS_ERR(imxpriv->gpr)) { + dev_err(dev, "failed to find fsl,imx6q-iomux-gpr regmap\n"); + return PTR_ERR(imxpriv->gpr); + } + + ret = clk_prepare_enable(imxpriv->sata_ref_clk); + if (ret < 0) { + dev_err(dev, "prepare-enable sata_ref clock err:%d\n", ret); + return ret; + } + + /* + * set PHY Paremeters, two steps to configure the GPR13, + * one write for rest of parameters, mask of first write + * is 0x07fffffd, and the other one write for setting + * the mpll_clk_en. + */ + regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK + | IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK + | IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK + | IMX6Q_GPR13_SATA_SPD_MODE_MASK + | IMX6Q_GPR13_SATA_MPLL_SS_EN + | IMX6Q_GPR13_SATA_TX_ATTEN_MASK + | IMX6Q_GPR13_SATA_TX_BOOST_MASK + | IMX6Q_GPR13_SATA_TX_LVL_MASK + | IMX6Q_GPR13_SATA_TX_EDGE_RATE + , IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB + | IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M + | IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F + | IMX6Q_GPR13_SATA_SPD_MODE_3P0G + | IMX6Q_GPR13_SATA_MPLL_SS_EN + | IMX6Q_GPR13_SATA_TX_ATTEN_9_16 + | IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB + | IMX6Q_GPR13_SATA_TX_LVL_1_025_V); + regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN, + IMX6Q_GPR13_SATA_MPLL_CLK_EN); + usleep_range(100, 200); + + /* + * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL, + * and IP vendor specific register HOST_TIMER1MS. + * Configure CAP_SSS (support stagered spin up). + * Implement the port0. + * Get the ahb clock rate, and configure the TIMER1MS register. + */ + reg_val = readl(mmio + HOST_CAP); + if (!(reg_val & HOST_CAP_SSS)) { + reg_val |= HOST_CAP_SSS; + writel(reg_val, mmio + HOST_CAP); + } + reg_val = readl(mmio + HOST_PORTS_IMPL); + if (!(reg_val & 0x1)) { + reg_val |= 0x1; + writel(reg_val, mmio + HOST_PORTS_IMPL); + } + + reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000; + writel(reg_val, mmio + HOST_TIMER1MS); + + return 0; +} + +static void imx6q_sata_exit(struct device *dev) +{ + struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); + + regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN, + !IMX6Q_GPR13_SATA_MPLL_CLK_EN); + clk_disable_unprepare(imxpriv->sata_ref_clk); +} + +static struct ahci_platform_data imx6q_sata_pdata = { + .init = imx6q_sata_init, + .exit = imx6q_sata_exit, +}; + +static const struct of_device_id imx_ahci_of_match[] = { + { .compatible = "fsl,imx6q-ahci", .data = &imx6q_sata_pdata}, + {}, +}; +MODULE_DEVICE_TABLE(of, imx_ahci_of_match); + +static int imx_ahci_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *mem, *irq, res[2]; + const struct of_device_id *of_id; + const struct ahci_platform_data *pdata = NULL; + struct imx_ahci_priv *imxpriv; + struct device *ahci_dev; + struct platform_device *ahci_pdev; + int ret; + + imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL); + if (!imxpriv) { + dev_err(dev, "can't alloc ahci_host_priv\n"); + return -ENOMEM; + } + + ahci_pdev = platform_device_alloc("ahci", -1); + if (!ahci_pdev) + return -ENODEV; + + ahci_dev = &ahci_pdev->dev; + ahci_dev->parent = dev; + + imxpriv->ahb_clk = devm_clk_get(dev, "ahb"); + if (IS_ERR(imxpriv->ahb_clk)) { + dev_err(dev, "can't get ahb clock.\n"); + ret = PTR_ERR(imxpriv->ahb_clk); + goto err_out; + } + + imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref"); + if (IS_ERR(imxpriv->sata_ref_clk)) { + dev_err(dev, "can't get sata_ref clock.\n"); + ret = PTR_ERR(imxpriv->sata_ref_clk); + goto err_out; + } + + imxpriv->ahci_pdev = ahci_pdev; + platform_set_drvdata(pdev, imxpriv); + + of_id = of_match_device(imx_ahci_of_match, dev); + if (of_id) { + pdata = of_id->data; + } else { + ret = -EINVAL; + goto err_out; + } + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!mem || !irq) { + dev_err(dev, "no mmio/irq resource\n"); + ret = -ENOMEM; + goto err_out; + } + + res[0] = *mem; + res[1] = *irq; + + ahci_dev->coherent_dma_mask = DMA_BIT_MASK(32); + ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask; + ahci_dev->of_node = dev->of_node; + + ret = platform_device_add_resources(ahci_pdev, res, 2); + if (ret) + goto err_out; + + ret = platform_device_add_data(ahci_pdev, pdata, sizeof(*pdata)); + if (ret) + goto err_out; + + ret = platform_device_add(ahci_pdev); + if (ret) { +err_out: + platform_device_put(ahci_pdev); + return ret; + } + + return 0; +} + +static int imx_ahci_remove(struct platform_device *pdev) +{ + struct imx_ahci_priv *imxpriv = platform_get_drvdata(pdev); + struct platform_device *ahci_pdev = imxpriv->ahci_pdev; + + platform_device_unregister(ahci_pdev); + return 0; +} + +static struct platform_driver imx_ahci_driver = { + .probe = imx_ahci_probe, + .remove = imx_ahci_remove, + .driver = { + .name = "ahci-imx", + .owner = THIS_MODULE, + .of_match_table = imx_ahci_of_match, + }, +}; +module_platform_driver(imx_ahci_driver); + +MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver"); +MODULE_AUTHOR("Richard Zhu <Hong-Xing.Zhu@freescale.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ahci:imx"); diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index b52a10c8eeb9..513ad7ed0c99 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -330,7 +330,7 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (Wellsburg) */ { 0x8086, 0x8d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Wellsburg) */ - { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, /* SATA Controller IDE (Wellsburg) */ { 0x8086, 0x8d60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Wellsburg) */ diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 83c08907e042..b1e880a3c3da 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -206,8 +206,10 @@ static ssize_t ata_scsi_park_store(struct device *device, unsigned long flags; int rc; - rc = strict_strtol(buf, 10, &input); - if (rc || input < -2) + rc = kstrtol(buf, 10, &input); + if (rc) + return rc; + if (input < -2) return -EINVAL; if (input > ATA_TMOUT_MAX_PARK) { rc = -EOVERFLOW; diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index e45131748248..5c54d957370a 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -6,6 +6,18 @@ * * This file is released under GPL v2. * + * **** WARNING **** + * + * This driver never worked properly and unfortunately data corruption is + * relatively common. There isn't anyone working on the driver and there's + * no support from the vendor. Do not use this driver in any production + * environment. + * + * http://thread.gmane.org/gmane.linux.debian.devel.bugs.rc/378525/focus=54491 + * https://bugzilla.kernel.org/show_bug.cgi?id=60565 + * + * ***************** + * * This controller is eccentric and easily locks up if something isn't * right. Documentation is available at initio's website but it only * documents registers (not programming model). @@ -807,6 +819,8 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ata_print_version_once(&pdev->dev, DRV_VERSION); + dev_alert(&pdev->dev, "inic162x support is broken with common data corruption issues and will be disabled by default, contact linux-ide@vger.kernel.org if in production use\n"); + /* alloc host */ host = ata_host_alloc_pinfo(&pdev->dev, ppi, NR_PORTS); hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL); diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index b81ddfea1da0..e07a5fd58ad7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -532,11 +532,11 @@ config BLK_DEV_RBD If unsure, say N. config BLK_DEV_RSXX - tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver" + tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI help Device driver for IBM's high speed PCIe SSD - storage devices: FlashSystem-70 and FlashSystem-80. + storage device: Flash Adapter 900GB Full Height. To compile this driver as a module, choose M here: the module will be called rsxx. diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 6608076dc39e..28c73ca320a8 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } +int drbd_initialize_al(struct drbd_conf *mdev, void *buffer) +{ + struct al_transaction_on_disk *al = buffer; + struct drbd_md *md = &mdev->ldev->md; + sector_t al_base = md->md_offset + md->al_offset; + int al_size_4k = md->al_stripes * md->al_stripe_size_4k; + int i; + + memset(al, 0, 4096); + al->magic = cpu_to_be32(DRBD_AL_MAGIC); + al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); + al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); + + for (i = 0; i < al_size_4k; i++) { + int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE); + if (err) + return err; + } + return 0; +} + static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f943aacfdad8..2d7f608d181c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -832,6 +832,7 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned susp_nod:1; /* IO suspended because no data */ unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ struct mutex cstate_mutex; /* Protects graceful disconnects */ + unsigned int connect_cnt; /* Inc each time a connection is established */ unsigned long flags; struct net_conf *net_conf; /* content protected by rcu */ @@ -1132,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); extern void conn_md_sync(struct drbd_tconn *tconn); +extern void drbd_md_write(struct drbd_conf *mdev, void *buffer); extern void drbd_md_sync(struct drbd_conf *mdev); extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); @@ -1466,8 +1468,16 @@ extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int); -enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; -extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); +enum determine_dev_size { + DS_ERROR_SHRINK = -3, + DS_ERROR_SPACE_MD = -2, + DS_ERROR = -1, + DS_UNCHANGED = 0, + DS_SHRUNK = 1, + DS_GREW = 2 +}; +extern enum determine_dev_size +drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, @@ -1633,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) extern void drbd_al_shrink(struct drbd_conf *mdev); +extern int drbd_initialize_al(struct drbd_conf *, void *); /* drbd_nl.c */ /* state info broadcast */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a5dca6affcbb..55635edf563b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2762,8 +2762,6 @@ int __init drbd_init(void) /* * allocate all necessary structs */ - err = -ENOMEM; - init_waitqueue_head(&drbd_pp_wait); drbd_proc = NULL; /* play safe for drbd_cleanup */ @@ -2773,6 +2771,7 @@ int __init drbd_init(void) if (err) goto fail; + err = -ENOMEM; drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); @@ -2803,7 +2802,6 @@ int __init drbd_init(void) fail: drbd_cleanup(); if (err == -ENOMEM) - /* currently always the case */ printk(KERN_ERR "drbd: ran out of memory\n"); else printk(KERN_ERR "drbd: initialization failure\n"); @@ -2881,34 +2879,14 @@ struct meta_data_on_disk { u8 reserved_u8[4096 - (7*8 + 10*4)]; } __packed; -/** - * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set - * @mdev: DRBD device. - */ -void drbd_md_sync(struct drbd_conf *mdev) + + +void drbd_md_write(struct drbd_conf *mdev, void *b) { - struct meta_data_on_disk *buffer; + struct meta_data_on_disk *buffer = b; sector_t sector; int i; - /* Don't accidentally change the DRBD meta data layout. */ - BUILD_BUG_ON(UI_SIZE != 4); - BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); - - del_timer(&mdev->md_sync_timer); - /* timer may be rearmed by drbd_md_mark_dirty() now. */ - if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) - return; - - /* We use here D_FAILED and not D_ATTACHING because we try to write - * metadata even if we detach due to a disk failure! */ - if (!get_ldev_if_state(mdev, D_FAILED)) - return; - - buffer = drbd_md_get_buffer(mdev); - if (!buffer) - goto out; - memset(buffer, 0, sizeof(*buffer)); buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2937,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev) dev_err(DEV, "meta data update failed!\n"); drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } +} + +/** + * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set + * @mdev: DRBD device. + */ +void drbd_md_sync(struct drbd_conf *mdev) +{ + struct meta_data_on_disk *buffer; + + /* Don't accidentally change the DRBD meta data layout. */ + BUILD_BUG_ON(UI_SIZE != 4); + BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096); + + del_timer(&mdev->md_sync_timer); + /* timer may be rearmed by drbd_md_mark_dirty() now. */ + if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) + return; + + /* We use here D_FAILED and not D_ATTACHING because we try to write + * metadata even if we detach due to a disk failure! */ + if (!get_ldev_if_state(mdev, D_FAILED)) + return; + + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + + drbd_md_write(mdev, buffer); /* Update mdev->ldev->md.la_size_sect, * since we updated it on metadata. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9e3f441e7e84..8cc1e640f485 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) bool conn_try_outdate_peer(struct drbd_tconn *tconn) { + unsigned int connect_cnt; union drbd_state mask = { }; union drbd_state val = { }; enum drbd_fencing_p fp; @@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) return false; } + spin_lock_irq(&tconn->req_lock); + connect_cnt = tconn->connect_cnt; + spin_unlock_irq(&tconn->req_lock); + fp = highest_fencing_policy(tconn); switch (fp) { case FP_NOT_AVAIL: @@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) here, because we might were able to re-establish the connection in the meantime. */ spin_lock_irq(&tconn->req_lock); - if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) - _conn_request_state(tconn, mask, val, CS_VERBOSE); + if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) { + if (tconn->connect_cnt != connect_cnt) + /* In case the connection was established and droped + while the fence-peer handler was running, ignore it */ + conn_info(tconn, "Ignoring fence-peer exit code\n"); + else + _conn_request_state(tconn, mask, val, CS_VERBOSE); + } spin_unlock_irq(&tconn->req_lock); return conn_highest_pdsk(tconn) <= D_OUTDATED; @@ -816,15 +827,20 @@ void drbd_resume_io(struct drbd_conf *mdev) * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ -enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) +enum determine_dev_size +drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size_sect, u_size; + struct drbd_md *md = &mdev->ldev->md; + u32 prev_al_stripe_size_4k; + u32 prev_al_stripes; sector_t size; char ppb[10]; + void *buffer; int md_moved, la_size_changed; - enum determine_dev_size rv = unchanged; + enum determine_dev_size rv = DS_UNCHANGED; /* race: * application request passes inc_ap_bio, @@ -836,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds * still lock the act_log to not trigger ASSERTs there. */ drbd_suspend_io(mdev); + buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */ + if (!buffer) { + drbd_resume_io(mdev); + return DS_ERROR; + } /* no wait necessary anymore, actually we could assert that */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); @@ -844,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds prev_size = mdev->ldev->md.md_size_sect; la_size_sect = mdev->ldev->md.la_size_sect; - /* TODO: should only be some assert here, not (re)init... */ + if (rs) { + /* rs is non NULL if we should change the AL layout only */ + + prev_al_stripes = md->al_stripes; + prev_al_stripe_size_4k = md->al_stripe_size_4k; + + md->al_stripes = rs->al_stripes; + md->al_stripe_size_4k = rs->al_stripe_size / 4; + md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; + } + drbd_md_set_sector_offsets(mdev, mdev->ldev); rcu_read_lock(); @@ -852,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds rcu_read_unlock(); size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED); + if (size < la_size_sect) { + if (rs && u_size == 0) { + /* Remove "rs &&" later. This check should always be active, but + right now the receiver expects the permissive behavior */ + dev_warn(DEV, "Implicit shrink not allowed. " + "Use --size=%llus for explicit shrink.\n", + (unsigned long long)size); + rv = DS_ERROR_SHRINK; + } + if (u_size > size) + rv = DS_ERROR_SPACE_MD; + if (rv != DS_UNCHANGED) + goto err_out; + } + if (drbd_get_capacity(mdev->this_bdev) != size || drbd_bm_capacity(mdev) != size) { int err; @@ -867,7 +913,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds "Leaving size unchanged at size = %lu KB\n", (unsigned long)size); } - rv = dev_size_error; + rv = DS_ERROR; } /* racy, see comments above. */ drbd_set_my_capacity(mdev, size); @@ -875,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); } - if (rv == dev_size_error) - goto out; + if (rv <= DS_ERROR) + goto err_out; la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect); md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) || prev_size != mdev->ldev->md.md_size_sect; - if (la_size_changed || md_moved) { - int err; + if (la_size_changed || md_moved || rs) { + u32 prev_flags; drbd_al_shrink(mdev); /* All extents inactive. */ + + prev_flags = md->flags; + md->flags &= ~MDF_PRIMARY_IND; + drbd_md_write(mdev, buffer); + dev_info(DEV, "Writing the whole bitmap, %s\n", la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, - "size changed", BM_LOCKED_MASK); - if (err) { - rv = dev_size_error; - goto out; - } - drbd_md_mark_dirty(mdev); + drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); + drbd_initialize_al(mdev, buffer); + + md->flags = prev_flags; + drbd_md_write(mdev, buffer); + + if (rs) + dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n", + md->al_stripes, md->al_stripe_size_4k * 4); } if (size > la_size_sect) - rv = grew; + rv = DS_GREW; if (size < la_size_sect) - rv = shrunk; -out: + rv = DS_SHRUNK; + + if (0) { + err_out: + if (rs) { + md->al_stripes = prev_al_stripes; + md->al_stripe_size_4k = prev_al_stripe_size_4k; + md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k; + + drbd_md_set_sector_offsets(mdev, mdev->ldev); + } + } lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); + drbd_md_put_buffer(mdev); drbd_resume_io(mdev); return rv; @@ -1607,11 +1672,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); - dd = drbd_determine_dev_size(mdev, 0); - if (dd == dev_size_error) { + dd = drbd_determine_dev_size(mdev, 0, NULL); + if (dd <= DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; - } else if (dd == grew) + } else if (dd == DS_GREW) set_bit(RESYNC_AFTER_NEG, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || @@ -2305,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) struct drbd_conf *mdev; enum drbd_ret_code retcode; enum determine_dev_size dd; + bool change_al_layout = false; enum dds_flags ddsf; sector_t u_size; int err; @@ -2315,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; + mdev = adm_ctx.mdev; + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto fail; + } + memset(&rs, 0, sizeof(struct resize_parms)); + rs.al_stripes = mdev->ldev->md.al_stripes; + rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4; if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { err = resize_parms_from_attrs(&rs, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); - goto fail; + goto fail_ldev; } } - mdev = adm_ctx.mdev; if (mdev->state.conn > C_CONNECTED) { retcode = ERR_RESIZE_RESYNC; - goto fail; + goto fail_ldev; } if (mdev->state.role == R_SECONDARY && mdev->state.peer == R_SECONDARY) { retcode = ERR_NO_PRIMARY; - goto fail; - } - - if (!get_ldev(mdev)) { - retcode = ERR_NO_DISK; - goto fail; + goto fail_ldev; } if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { @@ -2358,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) } } + if (mdev->ldev->md.al_stripes != rs.al_stripes || + mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) { + u32 al_size_k = rs.al_stripes * rs.al_stripe_size; + + if (al_size_k > (16 * 1024 * 1024)) { + retcode = ERR_MD_LAYOUT_TOO_BIG; + goto fail_ldev; + } + + if (al_size_k < MD_32kB_SECT/2) { + retcode = ERR_MD_LAYOUT_TOO_SMALL; + goto fail_ldev; + } + + if (mdev->state.conn != C_CONNECTED) { + retcode = ERR_MD_LAYOUT_CONNECTED; + goto fail_ldev; + } + + change_al_layout = true; + } + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); @@ -2373,16 +2463,22 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) } ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); - dd = drbd_determine_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL); drbd_md_sync(mdev); put_ldev(mdev); - if (dd == dev_size_error) { + if (dd == DS_ERROR) { retcode = ERR_NOMEM_BITMAP; goto fail; + } else if (dd == DS_ERROR_SPACE_MD) { + retcode = ERR_MD_LAYOUT_NO_FIT; + goto fail; + } else if (dd == DS_ERROR_SHRINK) { + retcode = ERR_IMPLICIT_SHRINK; + goto fail; } if (mdev->state.conn == C_CONNECTED) { - if (dd == grew) + if (dd == DS_GREW) set_bit(RESIZE_PENDING, &mdev->flags); drbd_send_uuids(mdev); @@ -2658,7 +2754,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, const struct sib_info *sib) { struct state_info *si = NULL; /* for sizeof(si->member); */ - struct net_conf *nc; struct nlattr *nla; int got_ldev; int err = 0; @@ -2688,13 +2783,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, goto nla_put_failure; rcu_read_lock(); - if (got_ldev) - if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive)) - goto nla_put_failure; + if (got_ldev) { + struct disk_conf *disk_conf; - nc = rcu_dereference(mdev->tconn->net_conf); - if (nc) - err = net_conf_to_skb(skb, nc, exclude_sensitive); + disk_conf = rcu_dereference(mdev->ldev->disk_conf); + err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive); + } + if (!err) { + struct net_conf *nc; + + nc = rcu_dereference(mdev->tconn->net_conf); + if (nc) + err = net_conf_to_skb(skb, nc, exclude_sensitive); + } rcu_read_unlock(); if (err) goto nla_put_failure; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4222affff488..cc29cd3bf78b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1039,6 +1039,8 @@ randomize: rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { kref_get(&mdev->kref); + rcu_read_unlock(); + /* Prevent a race between resync-handshake and * being promoted to Primary. * @@ -1049,8 +1051,6 @@ randomize: mutex_lock(mdev->state_mutex); mutex_unlock(mdev->state_mutex); - rcu_read_unlock(); - if (discard_my_data) set_bit(DISCARD_MY_DATA, &mdev->flags); else @@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) { struct drbd_conf *mdev; struct p_sizes *p = pi->data; - enum determine_dev_size dd = unchanged; + enum determine_dev_size dd = DS_UNCHANGED; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -3617,9 +3617,9 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(mdev)) { - dd = drbd_determine_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf, NULL); put_ldev(mdev); - if (dd == dev_size_error) + if (dd == DS_ERROR) return -EIO; drbd_md_sync(mdev); } else { @@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi) drbd_send_sizes(mdev, 0, ddsf); } if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || - (dd == grew && mdev->state.conn == C_CONNECTED)) { + (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) { if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.disk >= D_INCONSISTENT) { if (ddsf & DDSF_NO_RESYNC) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 90c5be2b1d30..216d47b7e88b 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, drbd_thread_restart_nowait(&mdev->tconn->receiver); /* Resume AL writing if we get a connection */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { drbd_resume_al(mdev); + mdev->tconn->connect_cnt++; + } /* remember last attach time so request_timer_fn() won't * kill newly established sessions while we are still trying to thaw diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 5af21f2db29c..6e85e21445eb 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -31,6 +31,8 @@ #include <linux/slab.h> #include <linux/bitops.h> #include <linux/delay.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include <linux/genhd.h> #include <linux/idr.h> @@ -39,8 +41,9 @@ #include "rsxx_cfg.h" #define NO_LEGACY 0 +#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */ -MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver"); +MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver"); MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); @@ -49,9 +52,282 @@ static unsigned int force_legacy = NO_LEGACY; module_param(force_legacy, uint, 0444); MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); +static unsigned int sync_start = 1; +module_param(sync_start, uint, 0444); +MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete " + "until the card startup has completed."); + static DEFINE_IDA(rsxx_disk_ida); static DEFINE_SPINLOCK(rsxx_ida_lock); +/* --------------------Debugfs Setup ------------------- */ + +struct rsxx_cram { + u32 f_pos; + u32 offset; + void *i_private; +}; + +static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p) +{ + struct rsxx_cardinfo *card = m->private; + + seq_printf(m, "HWID 0x%08x\n", + ioread32(card->regmap + HWID)); + seq_printf(m, "SCRATCH 0x%08x\n", + ioread32(card->regmap + SCRATCH)); + seq_printf(m, "IER 0x%08x\n", + ioread32(card->regmap + IER)); + seq_printf(m, "IPR 0x%08x\n", + ioread32(card->regmap + IPR)); + seq_printf(m, "CREG_CMD 0x%08x\n", + ioread32(card->regmap + CREG_CMD)); + seq_printf(m, "CREG_ADD 0x%08x\n", + ioread32(card->regmap + CREG_ADD)); + seq_printf(m, "CREG_CNT 0x%08x\n", + ioread32(card->regmap + CREG_CNT)); + seq_printf(m, "CREG_STAT 0x%08x\n", + ioread32(card->regmap + CREG_STAT)); + seq_printf(m, "CREG_DATA0 0x%08x\n", + ioread32(card->regmap + CREG_DATA0)); + seq_printf(m, "CREG_DATA1 0x%08x\n", + ioread32(card->regmap + CREG_DATA1)); + seq_printf(m, "CREG_DATA2 0x%08x\n", + ioread32(card->regmap + CREG_DATA2)); + seq_printf(m, "CREG_DATA3 0x%08x\n", + ioread32(card->regmap + CREG_DATA3)); + seq_printf(m, "CREG_DATA4 0x%08x\n", + ioread32(card->regmap + CREG_DATA4)); + seq_printf(m, "CREG_DATA5 0x%08x\n", + ioread32(card->regmap + CREG_DATA5)); + seq_printf(m, "CREG_DATA6 0x%08x\n", + ioread32(card->regmap + CREG_DATA6)); + seq_printf(m, "CREG_DATA7 0x%08x\n", + ioread32(card->regmap + CREG_DATA7)); + seq_printf(m, "INTR_COAL 0x%08x\n", + ioread32(card->regmap + INTR_COAL)); + seq_printf(m, "HW_ERROR 0x%08x\n", + ioread32(card->regmap + HW_ERROR)); + seq_printf(m, "DEBUG0 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG0)); + seq_printf(m, "DEBUG1 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG1)); + seq_printf(m, "DEBUG2 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG2)); + seq_printf(m, "DEBUG3 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG3)); + seq_printf(m, "DEBUG4 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG4)); + seq_printf(m, "DEBUG5 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG5)); + seq_printf(m, "DEBUG6 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG6)); + seq_printf(m, "DEBUG7 0x%08x\n", + ioread32(card->regmap + PCI_DEBUG7)); + seq_printf(m, "RECONFIG 0x%08x\n", + ioread32(card->regmap + PCI_RECONFIG)); + + return 0; +} + +static int rsxx_attr_stats_show(struct seq_file *m, void *p) +{ + struct rsxx_cardinfo *card = m->private; + int i; + + for (i = 0; i < card->n_targets; i++) { + seq_printf(m, "Ctrl %d CRC Errors = %d\n", + i, card->ctrl[i].stats.crc_errors); + seq_printf(m, "Ctrl %d Hard Errors = %d\n", + i, card->ctrl[i].stats.hard_errors); + seq_printf(m, "Ctrl %d Soft Errors = %d\n", + i, card->ctrl[i].stats.soft_errors); + seq_printf(m, "Ctrl %d Writes Issued = %d\n", + i, card->ctrl[i].stats.writes_issued); + seq_printf(m, "Ctrl %d Writes Failed = %d\n", + i, card->ctrl[i].stats.writes_failed); + seq_printf(m, "Ctrl %d Reads Issued = %d\n", + i, card->ctrl[i].stats.reads_issued); + seq_printf(m, "Ctrl %d Reads Failed = %d\n", + i, card->ctrl[i].stats.reads_failed); + seq_printf(m, "Ctrl %d Reads Retried = %d\n", + i, card->ctrl[i].stats.reads_retried); + seq_printf(m, "Ctrl %d Discards Issued = %d\n", + i, card->ctrl[i].stats.discards_issued); + seq_printf(m, "Ctrl %d Discards Failed = %d\n", + i, card->ctrl[i].stats.discards_failed); + seq_printf(m, "Ctrl %d DMA SW Errors = %d\n", + i, card->ctrl[i].stats.dma_sw_err); + seq_printf(m, "Ctrl %d DMA HW Faults = %d\n", + i, card->ctrl[i].stats.dma_hw_fault); + seq_printf(m, "Ctrl %d DMAs Cancelled = %d\n", + i, card->ctrl[i].stats.dma_cancelled); + seq_printf(m, "Ctrl %d SW Queue Depth = %d\n", + i, card->ctrl[i].stats.sw_q_depth); + seq_printf(m, "Ctrl %d HW Queue Depth = %d\n", + i, atomic_read(&card->ctrl[i].stats.hw_q_depth)); + } + + return 0; +} + +static int rsxx_attr_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, rsxx_attr_stats_show, inode->i_private); +} + +static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file) +{ + return single_open(file, rsxx_attr_pci_regs_show, inode->i_private); +} + +static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct rsxx_cram *info = fp->private_data; + struct rsxx_cardinfo *card = info->i_private; + char *buf; + int st; + + buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + info->f_pos = (u32)*ppos + info->offset; + + st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); + if (st) + return st; + + st = copy_to_user(ubuf, buf, cnt); + if (st) + return st; + + info->offset += cnt; + + kfree(buf); + + return cnt; +} + +static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct rsxx_cram *info = fp->private_data; + struct rsxx_cardinfo *card = info->i_private; + char *buf; + int st; + + buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + st = copy_from_user(buf, ubuf, cnt); + if (st) + return st; + + info->f_pos = (u32)*ppos + info->offset; + + st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); + if (st) + return st; + + info->offset += cnt; + + kfree(buf); + + return cnt; +} + +static int rsxx_cram_open(struct inode *inode, struct file *file) +{ + struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->i_private = inode->i_private; + info->f_pos = file->f_pos; + file->private_data = info; + + return 0; +} + +static int rsxx_cram_release(struct inode *inode, struct file *file) +{ + struct rsxx_cram *info = file->private_data; + + if (!info) + return 0; + + kfree(info); + file->private_data = NULL; + + return 0; +} + +static const struct file_operations debugfs_cram_fops = { + .owner = THIS_MODULE, + .open = rsxx_cram_open, + .read = rsxx_cram_read, + .write = rsxx_cram_write, + .release = rsxx_cram_release, +}; + +static const struct file_operations debugfs_stats_fops = { + .owner = THIS_MODULE, + .open = rsxx_attr_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations debugfs_pci_regs_fops = { + .owner = THIS_MODULE, + .open = rsxx_attr_pci_regs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card) +{ + struct dentry *debugfs_stats; + struct dentry *debugfs_pci_regs; + struct dentry *debugfs_cram; + + card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL); + if (IS_ERR_OR_NULL(card->debugfs_dir)) + goto failed_debugfs_dir; + + debugfs_stats = debugfs_create_file("stats", S_IRUGO, + card->debugfs_dir, card, + &debugfs_stats_fops); + if (IS_ERR_OR_NULL(debugfs_stats)) + goto failed_debugfs_stats; + + debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO, + card->debugfs_dir, card, + &debugfs_pci_regs_fops); + if (IS_ERR_OR_NULL(debugfs_pci_regs)) + goto failed_debugfs_pci_regs; + + debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR, + card->debugfs_dir, card, + &debugfs_cram_fops); + if (IS_ERR_OR_NULL(debugfs_cram)) + goto failed_debugfs_cram; + + return; +failed_debugfs_cram: + debugfs_remove(debugfs_pci_regs); +failed_debugfs_pci_regs: + debugfs_remove(debugfs_stats); +failed_debugfs_stats: + debugfs_remove(card->debugfs_dir); +failed_debugfs_dir: + card->debugfs_dir = NULL; +} + /*----------------- Interrupt Control & Handling -------------------*/ static void rsxx_mask_interrupts(struct rsxx_cardinfo *card) @@ -163,12 +439,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata) } if (isr & CR_INTR_CREG) { - schedule_work(&card->creg_ctrl.done_work); + queue_work(card->creg_ctrl.creg_wq, + &card->creg_ctrl.done_work); handled++; } if (isr & CR_INTR_EVENT) { - schedule_work(&card->event_work); + queue_work(card->event_wq, &card->event_work); rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); handled++; } @@ -329,7 +606,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev) int i; int st; - dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n"); + dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n"); card->eeh_state = 1; rsxx_mask_interrupts(card); @@ -367,15 +644,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev) { struct rsxx_cardinfo *card = pci_get_drvdata(dev); int i; + int cnt = 0; - dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n"); + dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n"); card->eeh_state = 1; + card->halt = 1; - for (i = 0; i < card->n_targets; i++) - del_timer_sync(&card->ctrl[i].activity_timer); + for (i = 0; i < card->n_targets; i++) { + spin_lock_bh(&card->ctrl[i].queue_lock); + cnt = rsxx_cleanup_dma_queue(&card->ctrl[i], + &card->ctrl[i].queue); + spin_unlock_bh(&card->ctrl[i].queue_lock); + + cnt += rsxx_dma_cancel(&card->ctrl[i]); - rsxx_eeh_cancel_dmas(card); + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, card->ctrl[i].id); + } } static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) @@ -432,7 +720,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) int st; dev_warn(&dev->dev, - "IBM FlashSystem PCI: recovering from slot reset.\n"); + "IBM Flash Adapter PCI: recovering from slot reset.\n"); st = pci_enable_device(dev); if (st) @@ -485,7 +773,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) &card->ctrl[i].issue_dma_work); } - dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n"); + dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n"); return PCI_ERS_RESULT_RECOVERED; @@ -528,6 +816,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, { struct rsxx_cardinfo *card; int st; + unsigned int sync_timeout; dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); @@ -610,7 +899,11 @@ static int rsxx_pci_probe(struct pci_dev *dev, } /************* Setup Processor Command Interface *************/ - rsxx_creg_setup(card); + st = rsxx_creg_setup(card); + if (st) { + dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n"); + goto failed_creg_setup; + } spin_lock_irq(&card->irq_lock); rsxx_enable_ier_and_isr(card, CR_INTR_CREG); @@ -650,6 +943,12 @@ static int rsxx_pci_probe(struct pci_dev *dev, } /************* Setup Card Event Handler *************/ + card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event"); + if (!card->event_wq) { + dev_err(CARD_TO_DEV(card), "Failed card event setup.\n"); + goto failed_event_handler; + } + INIT_WORK(&card->event_work, card_event_handler); st = rsxx_setup_dev(card); @@ -676,6 +975,33 @@ static int rsxx_pci_probe(struct pci_dev *dev, if (st) dev_crit(CARD_TO_DEV(card), "Failed issuing card startup\n"); + if (sync_start) { + sync_timeout = SYNC_START_TIMEOUT; + + dev_info(CARD_TO_DEV(card), + "Waiting for card to startup\n"); + + do { + ssleep(1); + sync_timeout--; + + rsxx_get_card_state(card, &card->state); + } while (sync_timeout && + (card->state == CARD_STATE_STARTING)); + + if (card->state == CARD_STATE_STARTING) { + dev_warn(CARD_TO_DEV(card), + "Card startup timed out\n"); + card->size8 = 0; + } else { + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + } } else if (card->state == CARD_STATE_GOOD || card->state == CARD_STATE_RD_ONLY_FAULT) { st = rsxx_get_card_size8(card, &card->size8); @@ -685,12 +1011,21 @@ static int rsxx_pci_probe(struct pci_dev *dev, rsxx_attach_dev(card); + /************* Setup Debugfs *************/ + rsxx_debugfs_dev_new(card); + return 0; failed_create_dev: + destroy_workqueue(card->event_wq); + card->event_wq = NULL; +failed_event_handler: rsxx_dma_destroy(card); failed_dma_setup: failed_compatiblity_check: + destroy_workqueue(card->creg_ctrl.creg_wq); + card->creg_ctrl.creg_wq = NULL; +failed_creg_setup: spin_lock_irq(&card->irq_lock); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); spin_unlock_irq(&card->irq_lock); @@ -756,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev) /* Prevent work_structs from re-queuing themselves. */ card->halt = 1; + debugfs_remove_recursive(card->debugfs_dir); + free_irq(dev->irq, card); if (!force_legacy) diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index 4b5c020a0a65..926dce9c452f 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card, *hw_stat = completion.creg_status; if (completion.st) { + /* + * This read is needed to verify that there has not been any + * extreme errors that might have occurred, i.e. EEH. The + * function iowrite32 will not detect EEH errors, so it is + * necessary that we recover if such an error is the reason + * for the timeout. This is a dummy read. + */ + ioread32(card->regmap + SCRATCH); + dev_warn(CARD_TO_DEV(card), "creg command failed(%d x%08x)\n", completion.st, addr); @@ -727,6 +736,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card) { card->creg_ctrl.active_cmd = NULL; + card->creg_ctrl.creg_wq = + create_singlethread_workqueue(DRIVER_NAME"_creg"); + if (!card->creg_ctrl.creg_wq) + return -ENOMEM; + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); mutex_init(&card->creg_ctrl.reset_lock); INIT_LIST_HEAD(&card->creg_ctrl.queue); diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 4346d17d2949..d7af441880be 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, atomic_set(&meta->error, 1); if (atomic_dec_and_test(&meta->pending_dmas)) { - disk_stats_complete(card, meta->bio, meta->start_time); + if (!card->eeh_state && card->gendisk) + disk_stats_complete(card, meta->bio, meta->start_time); bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); kmem_cache_free(bio_meta_pool, meta); @@ -170,6 +171,12 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) might_sleep(); + if (!card) + goto req_err; + + if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk)) + goto req_err; + if (unlikely(card->halt)) { st = -EFAULT; goto req_err; @@ -196,7 +203,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) atomic_set(&bio_meta->pending_dmas, 0); bio_meta->start_time = jiffies; - disk_stats_start(card, bio); + if (!unlikely(card->halt)) + disk_stats_start(card, bio); dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", bio_data_dir(bio) ? 'W' : 'R', bio_meta, @@ -225,24 +233,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card) return (pci_rev >= RSXX_DISCARD_SUPPORT); } -static unsigned short rsxx_get_logical_block_size( - struct rsxx_cardinfo *card) -{ - u32 capabilities = 0; - int st; - - st = rsxx_get_card_capabilities(card, &capabilities); - if (st) - dev_warn(CARD_TO_DEV(card), - "Failed reading card capabilities register\n"); - - /* Earlier firmware did not have support for 512 byte accesses */ - if (capabilities & CARD_CAP_SUBPAGE_WRITES) - return 512; - else - return RSXX_HW_BLK_SIZE; -} - int rsxx_attach_dev(struct rsxx_cardinfo *card) { mutex_lock(&card->dev_lock); @@ -305,7 +295,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) return -ENOMEM; } - blk_size = rsxx_get_logical_block_size(card); + blk_size = card->config.data.block_size; blk_queue_make_request(card->queue, rsxx_make_request); blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); @@ -347,6 +337,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card) card->gendisk = NULL; blk_cleanup_queue(card->queue); + card->queue->queuedata = NULL; unregister_blkdev(card->major, DRIVER_NAME); } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 0607513cfb41..bed32f16b084 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl, kmem_cache_free(rsxx_dma_pool, dma); } +int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); + cnt++; + } + + return cnt; +} + static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma) { @@ -252,9 +268,10 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, * Requeued DMAs go to the front of the queue so they are issued * first. */ - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); + ctrl->stats.sw_q_depth++; list_add(&dma->list, &ctrl->queue); - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); } static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, @@ -329,6 +346,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, static void dma_engine_stalled(unsigned long data) { struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + int cnt; if (atomic_read(&ctrl->stats.hw_q_depth) == 0 || unlikely(ctrl->card->eeh_state)) @@ -349,18 +367,28 @@ static void dma_engine_stalled(unsigned long data) "DMA channel %d has stalled, faulting interface.\n", ctrl->id); ctrl->card->dma_fault = 1; + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + cnt += rsxx_dma_cancel(ctrl); + + if (cnt) + dev_info(CARD_TO_DEV(ctrl->card), + "Freed %d queued DMAs on channel %d\n", + cnt, ctrl->id); } } -static void rsxx_issue_dmas(struct work_struct *work) +static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) { - struct rsxx_dma_ctrl *ctrl; struct rsxx_dma *dma; int tag; int cmds_pending = 0; struct hw_cmd *hw_cmd_buf; - ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); hw_cmd_buf = ctrl->cmd.buf; if (unlikely(ctrl->card->halt) || @@ -368,22 +396,22 @@ static void rsxx_issue_dmas(struct work_struct *work) return; while (1) { - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); if (list_empty(&ctrl->queue)) { - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); break; } - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); tag = pop_tracker(ctrl->trackers); if (tag == -1) break; - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); list_del(&dma->list); ctrl->stats.sw_q_depth--; - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); /* * This will catch any DMAs that slipped in right before the @@ -440,9 +468,8 @@ static void rsxx_issue_dmas(struct work_struct *work) } } -static void rsxx_dma_done(struct work_struct *work) +static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl) { - struct rsxx_dma_ctrl *ctrl; struct rsxx_dma *dma; unsigned long flags; u16 count; @@ -450,7 +477,6 @@ static void rsxx_dma_done(struct work_struct *work) u8 tag; struct hw_status *hw_st_buf; - ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); hw_st_buf = ctrl->status.buf; if (unlikely(ctrl->card->halt) || @@ -520,33 +546,32 @@ static void rsxx_dma_done(struct work_struct *work) rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); - spin_lock(&ctrl->queue_lock); + spin_lock_bh(&ctrl->queue_lock); if (ctrl->stats.sw_q_depth) queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); - spin_unlock(&ctrl->queue_lock); + spin_unlock_bh(&ctrl->queue_lock); } -static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, - struct list_head *q) +static void rsxx_schedule_issue(struct work_struct *work) { - struct rsxx_dma *dma; - struct rsxx_dma *tmp; - int cnt = 0; + struct rsxx_dma_ctrl *ctrl; - list_for_each_entry_safe(dma, tmp, q, list) { - list_del(&dma->list); + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); - if (dma->dma_addr) - pci_unmap_page(card->dev, dma->dma_addr, - get_dma_size(dma), - (dma->cmd == HW_CMD_BLK_WRITE) ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); - kmem_cache_free(rsxx_dma_pool, dma); - cnt++; - } + mutex_lock(&ctrl->work_lock); + rsxx_issue_dmas(ctrl); + mutex_unlock(&ctrl->work_lock); +} - return cnt; +static void rsxx_schedule_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + + mutex_lock(&ctrl->work_lock); + rsxx_dma_done(ctrl); + mutex_unlock(&ctrl->work_lock); } static int rsxx_queue_discard(struct rsxx_cardinfo *card, @@ -698,10 +723,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, for (i = 0; i < card->n_targets; i++) { if (!list_empty(&dma_list[i])) { - spin_lock(&card->ctrl[i].queue_lock); + spin_lock_bh(&card->ctrl[i].queue_lock); card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; list_splice_tail(&dma_list[i], &card->ctrl[i].queue); - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); queue_work(card->ctrl[i].issue_wq, &card->ctrl[i].issue_dma_work); @@ -711,8 +736,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, return 0; bvec_err: - for (i = 0; i < card->n_targets; i++) - rsxx_cleanup_dma_queue(card, &dma_list[i]); + for (i = 0; i < card->n_targets; i++) { + spin_lock_bh(&card->ctrl[i].queue_lock); + rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]); + spin_unlock_bh(&card->ctrl[i].queue_lock); + } return st; } @@ -780,6 +808,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, spin_lock_init(&ctrl->trackers->lock); spin_lock_init(&ctrl->queue_lock); + mutex_init(&ctrl->work_lock); INIT_LIST_HEAD(&ctrl->queue); setup_timer(&ctrl->activity_timer, dma_engine_stalled, @@ -793,8 +822,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev, if (!ctrl->done_wq) return -ENOMEM; - INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); - INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue); + INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done); st = rsxx_hw_buffers_init(dev, ctrl); if (st) @@ -918,13 +947,30 @@ failed_dma_setup: return st; } +int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl) +{ + struct rsxx_dma *dma; + int i; + int cnt = 0; + + /* Clean up issued DMAs */ + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + dma = get_tracker_dma(ctrl->trackers, i); + if (dma) { + atomic_dec(&ctrl->stats.hw_q_depth); + rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); + push_tracker(ctrl->trackers, i); + cnt++; + } + } + + return cnt; +} void rsxx_dma_destroy(struct rsxx_cardinfo *card) { struct rsxx_dma_ctrl *ctrl; - struct rsxx_dma *dma; - int i, j; - int cnt = 0; + int i; for (i = 0; i < card->n_targets; i++) { ctrl = &card->ctrl[i]; @@ -943,33 +989,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) del_timer_sync(&ctrl->activity_timer); /* Clean up the DMA queue */ - spin_lock(&ctrl->queue_lock); - cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); - spin_unlock(&ctrl->queue_lock); - - if (cnt) - dev_info(CARD_TO_DEV(card), - "Freed %d queued DMAs on channel %d\n", - cnt, i); - - /* Clean up issued DMAs */ - for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { - dma = get_tracker_dma(ctrl->trackers, j); - if (dma) { - pci_unmap_page(card->dev, dma->dma_addr, - get_dma_size(dma), - (dma->cmd == HW_CMD_BLK_WRITE) ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); - kmem_cache_free(rsxx_dma_pool, dma); - cnt++; - } - } + spin_lock_bh(&ctrl->queue_lock); + rsxx_cleanup_dma_queue(ctrl, &ctrl->queue); + spin_unlock_bh(&ctrl->queue_lock); - if (cnt) - dev_info(CARD_TO_DEV(card), - "Freed %d pending DMAs on channel %d\n", - cnt, i); + rsxx_dma_cancel(ctrl); vfree(ctrl->trackers); @@ -1013,7 +1037,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) cnt++; } - spin_lock(&card->ctrl[i].queue_lock); + spin_lock_bh(&card->ctrl[i].queue_lock); list_splice(&issued_dmas[i], &card->ctrl[i].queue); atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth); @@ -1028,7 +1052,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); } - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); } kfree(issued_dmas); @@ -1036,30 +1060,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) return 0; } -void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card) -{ - struct rsxx_dma *dma; - struct rsxx_dma *tmp; - int i; - - for (i = 0; i < card->n_targets; i++) { - spin_lock(&card->ctrl[i].queue_lock); - list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) { - list_del(&dma->list); - - rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED); - } - spin_unlock(&card->ctrl[i].queue_lock); - } -} - int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) { struct rsxx_dma *dma; int i; for (i = 0; i < card->n_targets; i++) { - spin_lock(&card->ctrl[i].queue_lock); + spin_lock_bh(&card->ctrl[i].queue_lock); list_for_each_entry(dma, &card->ctrl[i].queue, list) { dma->dma_addr = pci_map_page(card->dev, dma->page, dma->pg_off, get_dma_size(dma), @@ -1067,12 +1074,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card) PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (!dma->dma_addr) { - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); kmem_cache_free(rsxx_dma_pool, dma); return -ENOMEM; } } - spin_unlock(&card->ctrl[i].queue_lock); + spin_unlock_bh(&card->ctrl[i].queue_lock); } return 0; diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 382e8bf5c03b..5ad5055a4104 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -39,6 +39,7 @@ #include <linux/vmalloc.h> #include <linux/timer.h> #include <linux/ioctl.h> +#include <linux/delay.h> #include "rsxx.h" #include "rsxx_cfg.h" @@ -114,6 +115,7 @@ struct rsxx_dma_ctrl { struct timer_list activity_timer; struct dma_tracker_list *trackers; struct rsxx_dma_stats stats; + struct mutex work_lock; }; struct rsxx_cardinfo { @@ -134,6 +136,7 @@ struct rsxx_cardinfo { spinlock_t lock; bool active; struct creg_cmd *active_cmd; + struct workqueue_struct *creg_wq; struct work_struct done_work; struct list_head queue; unsigned int q_depth; @@ -154,6 +157,7 @@ struct rsxx_cardinfo { int buf_len; } log; + struct workqueue_struct *event_wq; struct work_struct event_work; unsigned int state; u64 size8; @@ -181,6 +185,8 @@ struct rsxx_cardinfo { int n_targets; struct rsxx_dma_ctrl *ctrl; + + struct dentry *debugfs_dir; }; enum rsxx_pci_regmap { @@ -283,6 +289,7 @@ enum rsxx_creg_addr { CREG_ADD_CAPABILITIES = 0x80001050, CREG_ADD_LOG = 0x80002000, CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CRAM = 0xA0000000, CREG_ADD_CONFIG = 0xB0000000, }; @@ -372,6 +379,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, int rsxx_dma_setup(struct rsxx_cardinfo *card); void rsxx_dma_destroy(struct rsxx_cardinfo *card); int rsxx_dma_init(void); +int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q); +int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl); void rsxx_dma_cleanup(void); void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); int rsxx_dma_configure(struct rsxx_cardinfo *card); @@ -382,7 +391,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, void *cb_data); int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl); int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card); -void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card); int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card); /***** cregs.c *****/ diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dd5b2fed97e9..bf4b9d282c04 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -50,110 +50,118 @@ #include "common.h" /* - * These are rather arbitrary. They are fairly large because adjacent requests - * pulled from a communication ring are quite likely to end up being part of - * the same scatter/gather request at the disc. + * Maximum number of unused free pages to keep in the internal buffer. + * Setting this to a value too low will reduce memory used in each backend, + * but can have a performance penalty. * - * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** - * - * This will increase the chances of being able to write whole tracks. - * 64 should be enough to keep us competitive with Linux. + * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can + * be set to a lower value that might degrade performance on some intensive + * IO workloads. */ -static int xen_blkif_reqs = 64; -module_param_named(reqs, xen_blkif_reqs, int, 0); -MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); -/* Run-time switchable: /sys/module/blkback/parameters/ */ -static unsigned int log_stats; -module_param(log_stats, int, 0644); +static int xen_blkif_max_buffer_pages = 1024; +module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); +MODULE_PARM_DESC(max_buffer_pages, +"Maximum number of free pages to keep in each block backend buffer"); /* - * Each outstanding request that we've passed to the lower device layers has a - * 'pending_req' allocated to it. Each buffer_head that completes decrements - * the pendcnt towards zero. When it hits zero, the specified domain has a - * response queued for it, with the saved 'id' passed back. + * Maximum number of grants to map persistently in blkback. For maximum + * performance this should be the total numbers of grants that can be used + * to fill the ring, but since this might become too high, specially with + * the use of indirect descriptors, we set it to a value that provides good + * performance without using too much memory. + * + * When the list of persistent grants is full we clean it up using a LRU + * algorithm. */ -struct pending_req { - struct xen_blkif *blkif; - u64 id; - int nr_pages; - atomic_t pendcnt; - unsigned short operation; - int status; - struct list_head free_list; - DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); -}; -#define BLKBACK_INVALID_HANDLE (~0) +static int xen_blkif_max_pgrants = 1056; +module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); +MODULE_PARM_DESC(max_persistent_grants, + "Maximum number of grants to map persistently"); -struct xen_blkbk { - struct pending_req *pending_reqs; - /* List of all 'pending_req' available */ - struct list_head pending_free; - /* And its spinlock. */ - spinlock_t pending_free_lock; - wait_queue_head_t pending_free_wq; - /* The list of all pages that are available. */ - struct page **pending_pages; - /* And the grant handles that are available. */ - grant_handle_t *pending_grant_handles; -}; - -static struct xen_blkbk *blkbk; +/* + * The LRU mechanism to clean the lists of persistent grants needs to + * be executed periodically. The time interval between consecutive executions + * of the purge mechanism is set in ms. + */ +#define LRU_INTERVAL 100 /* - * Maximum number of grant pages that can be mapped in blkback. - * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of - * pages that blkback will persistently map. - * Currently, this is: - * RING_SIZE = 32 (for all known ring types) - * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11 - * sizeof(struct persistent_gnt) = 48 - * So the maximum memory used to store the grants is: - * 32 * 11 * 48 = 16896 bytes + * When the persistent grants list is full we will remove unused grants + * from the list. The percent number of grants to be removed at each LRU + * execution. */ -static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol) +#define LRU_PERCENT_CLEAN 5 + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats; +module_param(log_stats, int, 0644); + +#define BLKBACK_INVALID_HANDLE (~0) + +/* Number of free pages to remove on each call to free_xenballooned_pages */ +#define NUM_BATCH_FREE_PAGES 10 + +static inline int get_free_page(struct xen_blkif *blkif, struct page **page) { - switch (protocol) { - case BLKIF_PROTOCOL_NATIVE: - return __CONST_RING_SIZE(blkif, PAGE_SIZE) * - BLKIF_MAX_SEGMENTS_PER_REQUEST; - case BLKIF_PROTOCOL_X86_32: - return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) * - BLKIF_MAX_SEGMENTS_PER_REQUEST; - case BLKIF_PROTOCOL_X86_64: - return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) * - BLKIF_MAX_SEGMENTS_PER_REQUEST; - default: - BUG(); + unsigned long flags; + + spin_lock_irqsave(&blkif->free_pages_lock, flags); + if (list_empty(&blkif->free_pages)) { + BUG_ON(blkif->free_pages_num != 0); + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + return alloc_xenballooned_pages(1, page, false); } + BUG_ON(blkif->free_pages_num == 0); + page[0] = list_first_entry(&blkif->free_pages, struct page, lru); + list_del(&page[0]->lru); + blkif->free_pages_num--; + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + return 0; } - -/* - * Little helpful macro to figure out the index and virtual address of the - * pending_pages[..]. For each 'pending_req' we have have up to - * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through - * 10 and would index in the pending_pages[..]. - */ -static inline int vaddr_pagenr(struct pending_req *req, int seg) +static inline void put_free_pages(struct xen_blkif *blkif, struct page **page, + int num) { - return (req - blkbk->pending_reqs) * - BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; -} + unsigned long flags; + int i; -#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] + spin_lock_irqsave(&blkif->free_pages_lock, flags); + for (i = 0; i < num; i++) + list_add(&page[i]->lru, &blkif->free_pages); + blkif->free_pages_num += num; + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); +} -static inline unsigned long vaddr(struct pending_req *req, int seg) +static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) { - unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); - return (unsigned long)pfn_to_kaddr(pfn); -} + /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */ + struct page *page[NUM_BATCH_FREE_PAGES]; + unsigned int num_pages = 0; + unsigned long flags; -#define pending_handle(_req, _seg) \ - (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) + spin_lock_irqsave(&blkif->free_pages_lock, flags); + while (blkif->free_pages_num > num) { + BUG_ON(list_empty(&blkif->free_pages)); + page[num_pages] = list_first_entry(&blkif->free_pages, + struct page, lru); + list_del(&page[num_pages]->lru); + blkif->free_pages_num--; + if (++num_pages == NUM_BATCH_FREE_PAGES) { + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + free_xenballooned_pages(num_pages, page); + spin_lock_irqsave(&blkif->free_pages_lock, flags); + num_pages = 0; + } + } + spin_unlock_irqrestore(&blkif->free_pages_lock, flags); + if (num_pages != 0) + free_xenballooned_pages(num_pages, page); +} +#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) static int do_block_io_op(struct xen_blkif *blkif); static int dispatch_rw_block_io(struct xen_blkif *blkif, @@ -170,13 +178,29 @@ static void make_response(struct xen_blkif *blkif, u64 id, (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) -static void add_persistent_gnt(struct rb_root *root, +/* + * We don't need locking around the persistent grant helpers + * because blkback uses a single-thread for each backed, so we + * can be sure that this functions will never be called recursively. + * + * The only exception to that is put_persistent_grant, that can be called + * from interrupt context (by xen_blkbk_unmap), so we have to use atomic + * bit operations to modify the flags of a persistent grant and to count + * the number of used grants. + */ +static int add_persistent_gnt(struct xen_blkif *blkif, struct persistent_gnt *persistent_gnt) { - struct rb_node **new = &(root->rb_node), *parent = NULL; + struct rb_node **new = NULL, *parent = NULL; struct persistent_gnt *this; + if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) { + if (!blkif->vbd.overflow_max_grants) + blkif->vbd.overflow_max_grants = 1; + return -EBUSY; + } /* Figure out where to put new node */ + new = &blkif->persistent_gnts.rb_node; while (*new) { this = container_of(*new, struct persistent_gnt, node); @@ -186,22 +210,28 @@ static void add_persistent_gnt(struct rb_root *root, else if (persistent_gnt->gnt > this->gnt) new = &((*new)->rb_right); else { - pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n"); - BUG(); + pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n"); + return -EINVAL; } } + bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); + set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); - rb_insert_color(&(persistent_gnt->node), root); + rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts); + blkif->persistent_gnt_c++; + atomic_inc(&blkif->persistent_gnt_in_use); + return 0; } -static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, +static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif, grant_ref_t gref) { struct persistent_gnt *data; - struct rb_node *node = root->rb_node; + struct rb_node *node = NULL; + node = blkif->persistent_gnts.rb_node; while (node) { data = container_of(node, struct persistent_gnt, node); @@ -209,13 +239,31 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root, node = node->rb_left; else if (gref > data->gnt) node = node->rb_right; - else + else { + if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { + pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n"); + return NULL; + } + set_bit(PERSISTENT_GNT_ACTIVE, data->flags); + atomic_inc(&blkif->persistent_gnt_in_use); return data; + } } return NULL; } -static void free_persistent_gnts(struct rb_root *root, unsigned int num) +static void put_persistent_gnt(struct xen_blkif *blkif, + struct persistent_gnt *persistent_gnt) +{ + if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + pr_alert_ratelimited(DRV_PFX " freeing a grant already unused"); + set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + atomic_dec(&blkif->persistent_gnt_in_use); +} + +static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, + unsigned int num) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -240,7 +288,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); - free_xenballooned_pages(segs_to_unmap, pages); + put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; } @@ -251,21 +299,148 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) BUG_ON(num != 0); } +static void unmap_purged_grants(struct work_struct *work) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct persistent_gnt *persistent_gnt; + int ret, segs_to_unmap = 0; + struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work); + + while(!list_empty(&blkif->persistent_purge_list)) { + persistent_gnt = list_first_entry(&blkif->persistent_purge_list, + struct persistent_gnt, + remove_node); + list_del(&persistent_gnt->remove_node); + + gnttab_set_unmap_op(&unmap[segs_to_unmap], + vaddr(persistent_gnt->page), + GNTMAP_host_map, + persistent_gnt->handle); + + pages[segs_to_unmap] = persistent_gnt->page; + + if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { + ret = gnttab_unmap_refs(unmap, NULL, pages, + segs_to_unmap); + BUG_ON(ret); + put_free_pages(blkif, pages, segs_to_unmap); + segs_to_unmap = 0; + } + kfree(persistent_gnt); + } + if (segs_to_unmap > 0) { + ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); + BUG_ON(ret); + put_free_pages(blkif, pages, segs_to_unmap); + } +} + +static void purge_persistent_gnt(struct xen_blkif *blkif) +{ + struct persistent_gnt *persistent_gnt; + struct rb_node *n; + unsigned int num_clean, total; + bool scan_used = false, clean_used = false; + struct rb_root *root; + + if (blkif->persistent_gnt_c < xen_blkif_max_pgrants || + (blkif->persistent_gnt_c == xen_blkif_max_pgrants && + !blkif->vbd.overflow_max_grants)) { + return; + } + + if (work_pending(&blkif->persistent_purge_work)) { + pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n"); + return; + } + + num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; + num_clean = min(blkif->persistent_gnt_c, num_clean); + if ((num_clean == 0) || + (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use)))) + return; + + /* + * At this point, we can assure that there will be no calls + * to get_persistent_grant (because we are executing this code from + * xen_blkif_schedule), there can only be calls to put_persistent_gnt, + * which means that the number of currently used grants will go down, + * but never up, so we will always be able to remove the requested + * number of grants. + */ + + total = num_clean; + + pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); + + INIT_LIST_HEAD(&blkif->persistent_purge_list); + root = &blkif->persistent_gnts; +purge_list: + foreach_grant_safe(persistent_gnt, n, root, node) { + BUG_ON(persistent_gnt->handle == + BLKBACK_INVALID_HANDLE); + + if (clean_used) { + clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + continue; + } + + if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + continue; + if (!scan_used && + (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags))) + continue; + + rb_erase(&persistent_gnt->node, root); + list_add(&persistent_gnt->remove_node, + &blkif->persistent_purge_list); + if (--num_clean == 0) + goto finished; + } + /* + * If we get here it means we also need to start cleaning + * grants that were used since last purge in order to cope + * with the requested num + */ + if (!scan_used && !clean_used) { + pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean); + scan_used = true; + goto purge_list; + } +finished: + if (!clean_used) { + pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n"); + clean_used = true; + goto purge_list; + } + + blkif->persistent_gnt_c -= (total - num_clean); + blkif->vbd.overflow_max_grants = 0; + + /* We can defer this work */ + INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants); + schedule_work(&blkif->persistent_purge_work); + pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); + return; +} + /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ -static struct pending_req *alloc_req(void) +static struct pending_req *alloc_req(struct xen_blkif *blkif) { struct pending_req *req = NULL; unsigned long flags; - spin_lock_irqsave(&blkbk->pending_free_lock, flags); - if (!list_empty(&blkbk->pending_free)) { - req = list_entry(blkbk->pending_free.next, struct pending_req, + spin_lock_irqsave(&blkif->pending_free_lock, flags); + if (!list_empty(&blkif->pending_free)) { + req = list_entry(blkif->pending_free.next, struct pending_req, free_list); list_del(&req->free_list); } - spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + spin_unlock_irqrestore(&blkif->pending_free_lock, flags); return req; } @@ -273,17 +448,17 @@ static struct pending_req *alloc_req(void) * Return the 'pending_req' structure back to the freepool. We also * wake up the thread if it was waiting for a free page. */ -static void free_req(struct pending_req *req) +static void free_req(struct xen_blkif *blkif, struct pending_req *req) { unsigned long flags; int was_empty; - spin_lock_irqsave(&blkbk->pending_free_lock, flags); - was_empty = list_empty(&blkbk->pending_free); - list_add(&req->free_list, &blkbk->pending_free); - spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + spin_lock_irqsave(&blkif->pending_free_lock, flags); + was_empty = list_empty(&blkif->pending_free); + list_add(&req->free_list, &blkif->pending_free); + spin_unlock_irqrestore(&blkif->pending_free_lock, flags); if (was_empty) - wake_up(&blkbk->pending_free_wq); + wake_up(&blkif->pending_free_wq); } /* @@ -382,10 +557,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" - " | ds %4llu\n", + " | ds %4llu | pg: %4u/%4d\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, - blkif->st_f_req, blkif->st_ds_req); + blkif->st_f_req, blkif->st_ds_req, + blkif->persistent_gnt_c, + xen_blkif_max_pgrants); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -397,6 +574,8 @@ int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; struct xen_vbd *vbd = &blkif->vbd; + unsigned long timeout; + int ret; xen_blkif_get(blkif); @@ -406,27 +585,52 @@ int xen_blkif_schedule(void *arg) if (unlikely(vbd->size != vbd_sz(vbd))) xen_vbd_resize(blkif); - wait_event_interruptible( + timeout = msecs_to_jiffies(LRU_INTERVAL); + + timeout = wait_event_interruptible_timeout( blkif->wq, - blkif->waiting_reqs || kthread_should_stop()); - wait_event_interruptible( - blkbk->pending_free_wq, - !list_empty(&blkbk->pending_free) || - kthread_should_stop()); + blkif->waiting_reqs || kthread_should_stop(), + timeout); + if (timeout == 0) + goto purge_gnt_list; + timeout = wait_event_interruptible_timeout( + blkif->pending_free_wq, + !list_empty(&blkif->pending_free) || + kthread_should_stop(), + timeout); + if (timeout == 0) + goto purge_gnt_list; blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - if (do_block_io_op(blkif)) + ret = do_block_io_op(blkif); + if (ret > 0) blkif->waiting_reqs = 1; + if (ret == -EACCES) + wait_event_interruptible(blkif->shutdown_wq, + kthread_should_stop()); + +purge_gnt_list: + if (blkif->vbd.feature_gnt_persistent && + time_after(jiffies, blkif->next_lru)) { + purge_persistent_gnt(blkif); + blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); + } + + /* Shrink if we have more than xen_blkif_max_buffer_pages */ + shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages); if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } + /* Since we are shutting down remove all pages from the buffer */ + shrink_free_pagepool(blkif, 0 /* All */); + /* Free all persistent grant pages */ if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) - free_persistent_gnts(&blkif->persistent_gnts, + free_persistent_gnts(blkif, &blkif->persistent_gnts, blkif->persistent_gnt_c); BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); @@ -441,148 +645,98 @@ int xen_blkif_schedule(void *arg) return 0; } -struct seg_buf { - unsigned int offset; - unsigned int nsec; -}; /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void xen_blkbk_unmap(struct pending_req *req) +static void xen_blkbk_unmap(struct xen_blkif *blkif, + struct grant_page *pages[], + int num) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; - grant_handle_t handle; int ret; - for (i = 0; i < req->nr_pages; i++) { - if (!test_bit(i, req->unmap_seg)) + for (i = 0; i < num; i++) { + if (pages[i]->persistent_gnt != NULL) { + put_persistent_gnt(blkif, pages[i]->persistent_gnt); continue; - handle = pending_handle(req, i); - if (handle == BLKBACK_INVALID_HANDLE) + } + if (pages[i]->handle == BLKBACK_INVALID_HANDLE) continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), - GNTMAP_host_map, handle); - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - pages[invcount] = virt_to_page(vaddr(req, i)); - invcount++; + unmap_pages[invcount] = pages[i]->page; + gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page), + GNTMAP_host_map, pages[i]->handle); + pages[i]->handle = BLKBACK_INVALID_HANDLE; + if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, + invcount); + BUG_ON(ret); + put_free_pages(blkif, unmap_pages, invcount); + invcount = 0; + } + } + if (invcount) { + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); + BUG_ON(ret); + put_free_pages(blkif, unmap_pages, invcount); } - - ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); - BUG_ON(ret); } -static int xen_blkbk_map(struct blkif_request *req, - struct pending_req *pending_req, - struct seg_buf seg[], - struct page *pages[]) +static int xen_blkbk_map(struct xen_blkif *blkif, + struct grant_page *pages[], + int num, bool ro) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt = NULL; - struct xen_blkif *blkif = pending_req->blkif; phys_addr_t addr = 0; - int i, j; - bool new_map; - int nseg = req->u.rw.nr_segments; + int i, seg_idx, new_map_idx; int segs_to_map = 0; int ret = 0; + int last_map = 0, map_until = 0; int use_persistent_gnts; use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); - BUG_ON(blkif->persistent_gnt_c > - max_mapped_grant_pages(pending_req->blkif->blk_protocol)); - /* * Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the * corresponding grant reference for each page. */ - for (i = 0; i < nseg; i++) { +again: + for (i = map_until; i < num; i++) { uint32_t flags; if (use_persistent_gnts) persistent_gnt = get_persistent_gnt( - &blkif->persistent_gnts, - req->u.rw.seg[i].gref); + blkif, + pages[i]->gref); if (persistent_gnt) { /* * We are using persistent grants and * the grant is already mapped */ - new_map = false; - } else if (use_persistent_gnts && - blkif->persistent_gnt_c < - max_mapped_grant_pages(blkif->blk_protocol)) { - /* - * We are using persistent grants, the grant is - * not mapped but we have room for it - */ - new_map = true; - persistent_gnt = kmalloc( - sizeof(struct persistent_gnt), - GFP_KERNEL); - if (!persistent_gnt) - return -ENOMEM; - if (alloc_xenballooned_pages(1, &persistent_gnt->page, - false)) { - kfree(persistent_gnt); - return -ENOMEM; - } - persistent_gnt->gnt = req->u.rw.seg[i].gref; - persistent_gnt->handle = BLKBACK_INVALID_HANDLE; - - pages_to_gnt[segs_to_map] = - persistent_gnt->page; - addr = (unsigned long) pfn_to_kaddr( - page_to_pfn(persistent_gnt->page)); - - add_persistent_gnt(&blkif->persistent_gnts, - persistent_gnt); - blkif->persistent_gnt_c++; - pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", - persistent_gnt->gnt, blkif->persistent_gnt_c, - max_mapped_grant_pages(blkif->blk_protocol)); + pages[i]->page = persistent_gnt->page; + pages[i]->persistent_gnt = persistent_gnt; } else { - /* - * We are either using persistent grants and - * hit the maximum limit of grants mapped, - * or we are not using persistent grants. - */ - if (use_persistent_gnts && - !blkif->vbd.overflow_max_grants) { - blkif->vbd.overflow_max_grants = 1; - pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", - blkif->domid, blkif->vbd.handle); - } - new_map = true; - pages[i] = blkbk->pending_page(pending_req, i); - addr = vaddr(pending_req, i); - pages_to_gnt[segs_to_map] = - blkbk->pending_page(pending_req, i); - } - - if (persistent_gnt) { - pages[i] = persistent_gnt->page; - persistent_gnts[i] = persistent_gnt; - } else { - persistent_gnts[i] = NULL; - } - - if (new_map) { + if (get_free_page(blkif, &pages[i]->page)) + goto out_of_memory; + addr = vaddr(pages[i]->page); + pages_to_gnt[segs_to_map] = pages[i]->page; + pages[i]->persistent_gnt = NULL; flags = GNTMAP_host_map; - if (!persistent_gnt && - (pending_req->operation != BLKIF_OP_READ)) + if (!use_persistent_gnts && ro) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[segs_to_map++], addr, - flags, req->u.rw.seg[i].gref, + flags, pages[i]->gref, blkif->domid); } + map_until = i + 1; + if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST) + break; } if (segs_to_map) { @@ -595,49 +749,133 @@ static int xen_blkbk_map(struct blkif_request *req, * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ - bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - for (i = 0, j = 0; i < nseg; i++) { - if (!persistent_gnts[i] || - persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) { + for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) { + if (!pages[seg_idx]->persistent_gnt) { /* This is a newly mapped grant */ - BUG_ON(j >= segs_to_map); - if (unlikely(map[j].status != 0)) { + BUG_ON(new_map_idx >= segs_to_map); + if (unlikely(map[new_map_idx].status != 0)) { pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); - map[j].handle = BLKBACK_INVALID_HANDLE; + pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; ret |= 1; - if (persistent_gnts[i]) { - rb_erase(&persistent_gnts[i]->node, - &blkif->persistent_gnts); - blkif->persistent_gnt_c--; - kfree(persistent_gnts[i]); - persistent_gnts[i] = NULL; - } + goto next; } + pages[seg_idx]->handle = map[new_map_idx].handle; + } else { + continue; } - if (persistent_gnts[i]) { - if (persistent_gnts[i]->handle == - BLKBACK_INVALID_HANDLE) { + if (use_persistent_gnts && + blkif->persistent_gnt_c < xen_blkif_max_pgrants) { + /* + * We are using persistent grants, the grant is + * not mapped but we might have room for it. + */ + persistent_gnt = kmalloc(sizeof(struct persistent_gnt), + GFP_KERNEL); + if (!persistent_gnt) { /* - * If this is a new persistent grant - * save the handler + * If we don't have enough memory to + * allocate the persistent_gnt struct + * map this grant non-persistenly */ - persistent_gnts[i]->handle = map[j++].handle; + goto next; } - pending_handle(pending_req, i) = - persistent_gnts[i]->handle; + persistent_gnt->gnt = map[new_map_idx].ref; + persistent_gnt->handle = map[new_map_idx].handle; + persistent_gnt->page = pages[seg_idx]->page; + if (add_persistent_gnt(blkif, + persistent_gnt)) { + kfree(persistent_gnt); + persistent_gnt = NULL; + goto next; + } + pages[seg_idx]->persistent_gnt = persistent_gnt; + pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", + persistent_gnt->gnt, blkif->persistent_gnt_c, + xen_blkif_max_pgrants); + goto next; + } + if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) { + blkif->vbd.overflow_max_grants = 1; + pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", + blkif->domid, blkif->vbd.handle); + } + /* + * We could not map this grant persistently, so use it as + * a non-persistent grant. + */ +next: + new_map_idx++; + } + segs_to_map = 0; + last_map = map_until; + if (map_until != num) + goto again; - if (ret) - continue; - } else { - pending_handle(pending_req, i) = map[j++].handle; - bitmap_set(pending_req->unmap_seg, i, 1); + return ret; + +out_of_memory: + pr_alert(DRV_PFX "%s: out of memory\n", __func__); + put_free_pages(blkif, pages_to_gnt, segs_to_map); + return -ENOMEM; +} + +static int xen_blkbk_map_seg(struct pending_req *pending_req) +{ + int rc; + + rc = xen_blkbk_map(pending_req->blkif, pending_req->segments, + pending_req->nr_pages, + (pending_req->operation != BLKIF_OP_READ)); + + return rc; +} - if (ret) - continue; +static int xen_blkbk_parse_indirect(struct blkif_request *req, + struct pending_req *pending_req, + struct seg_buf seg[], + struct phys_req *preq) +{ + struct grant_page **pages = pending_req->indirect_pages; + struct xen_blkif *blkif = pending_req->blkif; + int indirect_grefs, rc, n, nseg, i; + struct blkif_request_segment_aligned *segments = NULL; + + nseg = pending_req->nr_pages; + indirect_grefs = INDIRECT_PAGES(nseg); + BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); + + for (i = 0; i < indirect_grefs; i++) + pages[i]->gref = req->u.indirect.indirect_grefs[i]; + + rc = xen_blkbk_map(blkif, pages, indirect_grefs, true); + if (rc) + goto unmap; + + for (n = 0, i = 0; n < nseg; n++) { + if ((n % SEGS_PER_INDIRECT_FRAME) == 0) { + /* Map indirect segments */ + if (segments) + kunmap_atomic(segments); + segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page); + } + i = n % SEGS_PER_INDIRECT_FRAME; + pending_req->segments[n]->gref = segments[i].gref; + seg[n].nsec = segments[i].last_sect - + segments[i].first_sect + 1; + seg[n].offset = (segments[i].first_sect << 9); + if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) || + (segments[i].last_sect < segments[i].first_sect)) { + rc = -EINVAL; + goto unmap; } - seg[i].offset = (req->u.rw.seg[i].first_sect << 9); + preq->nr_sects += seg[n].nsec; } - return ret; + +unmap: + if (segments) + kunmap_atomic(segments); + xen_blkbk_unmap(blkif, pages, indirect_grefs); + return rc; } static int dispatch_discard_io(struct xen_blkif *blkif, @@ -647,7 +885,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif, int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; unsigned long secure; + struct phys_req preq; + + preq.sector_number = req->u.discard.sector_number; + preq.nr_sects = req->u.discard.nr_sectors; + err = xen_vbd_translate(&preq, blkif, WRITE); + if (err) { + pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n", + preq.sector_number, + preq.sector_number + preq.nr_sects, blkif->vbd.pdevice); + goto fail_response; + } blkif->st_ds_req++; xen_blkif_get(blkif); @@ -658,7 +907,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif, err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, secure); - +fail_response: if (err == -EOPNOTSUPP) { pr_debug(DRV_PFX "discard op failed, not supported\n"); status = BLKIF_RSP_EOPNOTSUPP; @@ -674,7 +923,7 @@ static int dispatch_other_io(struct xen_blkif *blkif, struct blkif_request *req, struct pending_req *pending_req) { - free_req(pending_req); + free_req(blkif, pending_req); make_response(blkif, req->u.other.id, req->operation, BLKIF_RSP_EOPNOTSUPP); return -EIO; @@ -726,7 +975,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req); + xen_blkbk_unmap(pending_req->blkif, + pending_req->segments, + pending_req->nr_pages); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); @@ -734,7 +985,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) if (atomic_read(&pending_req->blkif->drain)) complete(&pending_req->blkif->drain_complete); } - free_req(pending_req); + free_req(pending_req->blkif, pending_req); } } @@ -767,6 +1018,12 @@ __do_block_io_op(struct xen_blkif *blkif) rp = blk_rings->common.sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ + if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) { + rc = blk_rings->common.rsp_prod_pvt; + pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n", + rp, rc, rp - rc, blkif->vbd.pdevice); + return -EACCES; + } while (rc != rp) { if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) @@ -777,7 +1034,7 @@ __do_block_io_op(struct xen_blkif *blkif) break; } - pending_req = alloc_req(); + pending_req = alloc_req(blkif); if (NULL == pending_req) { blkif->st_oo_req++; more_to_do = 1; @@ -807,11 +1064,12 @@ __do_block_io_op(struct xen_blkif *blkif) case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: + case BLKIF_OP_INDIRECT: if (dispatch_rw_block_io(blkif, &req, pending_req)) goto done; break; case BLKIF_OP_DISCARD: - free_req(pending_req); + free_req(blkif, pending_req); if (dispatch_discard_io(blkif, &req)) goto done; break; @@ -853,17 +1111,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, struct pending_req *pending_req) { struct phys_req preq; - struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct seg_buf *seg = pending_req->seg; unsigned int nseg; struct bio *bio = NULL; - struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct bio **biolist = pending_req->biolist; int i, nbio = 0; int operation; struct blk_plug plug; bool drain = false; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct grant_page **pages = pending_req->segments; + unsigned short req_operation; + + req_operation = req->operation == BLKIF_OP_INDIRECT ? + req->u.indirect.indirect_op : req->operation; + if ((req->operation == BLKIF_OP_INDIRECT) && + (req_operation != BLKIF_OP_READ) && + (req_operation != BLKIF_OP_WRITE)) { + pr_debug(DRV_PFX "Invalid indirect operation (%u)\n", + req_operation); + goto fail_response; + } - switch (req->operation) { + switch (req_operation) { case BLKIF_OP_READ: blkif->st_rd_req++; operation = READ; @@ -885,33 +1154,47 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } /* Check that the number of segments is sane. */ - nseg = req->u.rw.nr_segments; + nseg = req->operation == BLKIF_OP_INDIRECT ? + req->u.indirect.nr_segments : req->u.rw.nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || - unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + unlikely((req->operation != BLKIF_OP_INDIRECT) && + (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || + unlikely((req->operation == BLKIF_OP_INDIRECT) && + (nseg > MAX_INDIRECT_SEGMENTS))) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); /* Haven't submitted any bio's yet. */ goto fail_response; } - preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; pending_req->id = req->u.rw.id; - pending_req->operation = req->operation; + pending_req->operation = req_operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - for (i = 0; i < nseg; i++) { - seg[i].nsec = req->u.rw.seg[i].last_sect - - req->u.rw.seg[i].first_sect + 1; - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) + if (req->operation != BLKIF_OP_INDIRECT) { + preq.dev = req->u.rw.handle; + preq.sector_number = req->u.rw.sector_number; + for (i = 0; i < nseg; i++) { + pages[i]->gref = req->u.rw.seg[i].gref; + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; + seg[i].offset = (req->u.rw.seg[i].first_sect << 9); + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < + req->u.rw.seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + } + } else { + preq.dev = req->u.indirect.handle; + preq.sector_number = req->u.indirect.sector_number; + if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq)) goto fail_response; - preq.nr_sects += seg[i].nsec; - } if (xen_vbd_translate(&preq, blkif, operation) != 0) { @@ -948,7 +1231,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg, pages)) + if (xen_blkbk_map_seg(pending_req)) goto fail_flush; /* @@ -960,11 +1243,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, for (i = 0; i < nseg; i++) { while ((bio == NULL) || (bio_add_page(bio, - pages[i], + pages[i]->page, seg[i].nsec << 9, seg[i].offset) == 0)) { - bio = bio_alloc(GFP_KERNEL, nseg-i); + int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES); + bio = bio_alloc(GFP_KERNEL, nr_iovecs); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -1009,11 +1293,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, return 0; fail_flush: - xen_blkbk_unmap(pending_req); + xen_blkbk_unmap(blkif, pending_req->segments, + pending_req->nr_pages); fail_response: /* Haven't submitted any bio's yet. */ - make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); - free_req(pending_req); + make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR); + free_req(blkif, pending_req); msleep(1); /* back off a bit */ return -EIO; @@ -1070,73 +1355,20 @@ static void make_response(struct xen_blkif *blkif, u64 id, static int __init xen_blkif_init(void) { - int i, mmap_pages; int rc = 0; if (!xen_domain()) return -ENODEV; - blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); - if (!blkbk) { - pr_alert(DRV_PFX "%s: out of memory!\n", __func__); - return -ENOMEM; - } - - mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - - blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * - xen_blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages, GFP_KERNEL); - blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * - mmap_pages, GFP_KERNEL); - - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || - !blkbk->pending_pages) { - rc = -ENOMEM; - goto out_of_memory; - } - - for (i = 0; i < mmap_pages; i++) { - blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); - if (blkbk->pending_pages[i] == NULL) { - rc = -ENOMEM; - goto out_of_memory; - } - } rc = xen_blkif_interface_init(); if (rc) goto failed_init; - INIT_LIST_HEAD(&blkbk->pending_free); - spin_lock_init(&blkbk->pending_free_lock); - init_waitqueue_head(&blkbk->pending_free_wq); - - for (i = 0; i < xen_blkif_reqs; i++) - list_add_tail(&blkbk->pending_reqs[i].free_list, - &blkbk->pending_free); - rc = xen_blkif_xenbus_init(); if (rc) goto failed_init; - return 0; - - out_of_memory: - pr_alert(DRV_PFX "%s: out of memory\n", __func__); failed_init: - kfree(blkbk->pending_reqs); - kfree(blkbk->pending_grant_handles); - if (blkbk->pending_pages) { - for (i = 0; i < mmap_pages; i++) { - if (blkbk->pending_pages[i]) - __free_page(blkbk->pending_pages[i]); - } - kfree(blkbk->pending_pages); - } - kfree(blkbk); - blkbk = NULL; return rc; } diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 60103e2517ba..8d8807563d99 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -50,6 +50,19 @@ __func__, __LINE__, ##args) +/* + * This is the maximum number of segments that would be allowed in indirect + * requests. This value will also be passed to the frontend. + */ +#define MAX_INDIRECT_SEGMENTS 256 + +#define SEGS_PER_INDIRECT_FRAME \ + (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) +#define MAX_INDIRECT_PAGES \ + ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) +#define INDIRECT_PAGES(_segs) \ + ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) + /* Not a real protocol. Used to generate ring structs which contain * the elements common to all protocols only. This way we get a * compiler-checkable way to use common struct elements, so we can @@ -83,12 +96,31 @@ struct blkif_x86_32_request_other { uint64_t id; /* private guest value, echoed in resp */ } __attribute__((__packed__)); +struct blkif_x86_32_request_indirect { + uint8_t indirect_op; + uint16_t nr_segments; + uint64_t id; + blkif_sector_t sector_number; + blkif_vdev_t handle; + uint16_t _pad1; + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; + /* + * The maximum number of indirect segments (and pages) that will + * be used is determined by MAX_INDIRECT_SEGMENTS, this value + * is also exported to the guest (via xenstore + * feature-max-indirect-segments entry), so the frontend knows how + * many indirect segments the backend supports. + */ + uint64_t _pad2; /* make it 64 byte aligned */ +} __attribute__((__packed__)); + struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; struct blkif_x86_32_request_other other; + struct blkif_x86_32_request_indirect indirect; } u; } __attribute__((__packed__)); @@ -127,12 +159,32 @@ struct blkif_x86_64_request_other { uint64_t id; /* private guest value, echoed in resp */ } __attribute__((__packed__)); +struct blkif_x86_64_request_indirect { + uint8_t indirect_op; + uint16_t nr_segments; + uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */ + uint64_t id; + blkif_sector_t sector_number; + blkif_vdev_t handle; + uint16_t _pad2; + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; + /* + * The maximum number of indirect segments (and pages) that will + * be used is determined by MAX_INDIRECT_SEGMENTS, this value + * is also exported to the guest (via xenstore + * feature-max-indirect-segments entry), so the frontend knows how + * many indirect segments the backend supports. + */ + uint32_t _pad3; /* make it 64 byte aligned */ +} __attribute__((__packed__)); + struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; struct blkif_x86_64_request_other other; + struct blkif_x86_64_request_indirect indirect; } u; } __attribute__((__packed__)); @@ -182,12 +234,26 @@ struct xen_vbd { struct backend_info; +/* Number of available flags */ +#define PERSISTENT_GNT_FLAGS_SIZE 2 +/* This persistent grant is currently in use */ +#define PERSISTENT_GNT_ACTIVE 0 +/* + * This persistent grant has been used, this flag is set when we remove the + * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently. + */ +#define PERSISTENT_GNT_WAS_ACTIVE 1 + +/* Number of requests that we can fit in a ring */ +#define XEN_BLKIF_REQS 32 struct persistent_gnt { struct page *page; grant_ref_t gnt; grant_handle_t handle; + DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE); struct rb_node node; + struct list_head remove_node; }; struct xen_blkif { @@ -219,6 +285,23 @@ struct xen_blkif { /* tree to store persistent grants */ struct rb_root persistent_gnts; unsigned int persistent_gnt_c; + atomic_t persistent_gnt_in_use; + unsigned long next_lru; + + /* used by the kworker that offload work from the persistent purge */ + struct list_head persistent_purge_list; + struct work_struct persistent_purge_work; + + /* buffer of free pages to map grant refs */ + spinlock_t free_pages_lock; + int free_pages_num; + struct list_head free_pages; + + /* List of all 'pending_req' available */ + struct list_head pending_free; + /* And its spinlock. */ + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; /* statistics */ unsigned long st_print; @@ -231,6 +314,41 @@ struct xen_blkif { unsigned long long st_wr_sect; wait_queue_head_t waiting_to_free; + /* Thread shutdown wait queue. */ + wait_queue_head_t shutdown_wq; +}; + +struct seg_buf { + unsigned long offset; + unsigned int nsec; +}; + +struct grant_page { + struct page *page; + struct persistent_gnt *persistent_gnt; + grant_handle_t handle; + grant_ref_t gref; +}; + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +struct pending_req { + struct xen_blkif *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; + struct grant_page *segments[MAX_INDIRECT_SEGMENTS]; + /* Indirect descriptors */ + struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; + struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; + struct bio *biolist[MAX_INDIRECT_SEGMENTS]; }; @@ -257,6 +375,7 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); +int xen_blkif_purge_persistent(void *arg); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); @@ -268,7 +387,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); static inline void blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j; dst->operation = src->operation; switch (src->operation) { case BLKIF_OP_READ: @@ -291,6 +410,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; + case BLKIF_OP_INDIRECT: + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; + dst->u.indirect.nr_segments = src->u.indirect.nr_segments; + dst->u.indirect.handle = src->u.indirect.handle; + dst->u.indirect.id = src->u.indirect.id; + dst->u.indirect.sector_number = src->u.indirect.sector_number; + barrier(); + j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments)); + for (i = 0; i < j; i++) + dst->u.indirect.indirect_grefs[i] = + src->u.indirect.indirect_grefs[i]; + break; default: /* * Don't know how to translate this op. Only get the @@ -304,7 +435,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, static inline void blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j; dst->operation = src->operation; switch (src->operation) { case BLKIF_OP_READ: @@ -327,6 +458,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; + case BLKIF_OP_INDIRECT: + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; + dst->u.indirect.nr_segments = src->u.indirect.nr_segments; + dst->u.indirect.handle = src->u.indirect.handle; + dst->u.indirect.id = src->u.indirect.id; + dst->u.indirect.sector_number = src->u.indirect.sector_number; + barrier(); + j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments)); + for (i = 0; i < j; i++) + dst->u.indirect.indirect_grefs[i] = + src->u.indirect.indirect_grefs[i]; + break; default: /* * Don't know how to translate this op. Only get the diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 04608a6502d7..fe5c3cd10c34 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -98,12 +98,17 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + return; } } static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; + struct pending_req *req, *n; + int i, j; + + BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) @@ -118,8 +123,57 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); blkif->persistent_gnts.rb_node = NULL; + spin_lock_init(&blkif->free_pages_lock); + INIT_LIST_HEAD(&blkif->free_pages); + blkif->free_pages_num = 0; + atomic_set(&blkif->persistent_gnt_in_use, 0); + + INIT_LIST_HEAD(&blkif->pending_free); + + for (i = 0; i < XEN_BLKIF_REQS; i++) { + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + goto fail; + list_add_tail(&req->free_list, + &blkif->pending_free); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + req->segments[j] = kzalloc(sizeof(*req->segments[0]), + GFP_KERNEL); + if (!req->segments[j]) + goto fail; + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), + GFP_KERNEL); + if (!req->indirect_pages[j]) + goto fail; + } + } + spin_lock_init(&blkif->pending_free_lock); + init_waitqueue_head(&blkif->pending_free_wq); + init_waitqueue_head(&blkif->shutdown_wq); return blkif; + +fail: + list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { + list_del(&req->free_list); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + if (!req->segments[j]) + break; + kfree(req->segments[j]); + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + if (!req->indirect_pages[j]) + break; + kfree(req->indirect_pages[j]); + } + kfree(req); + } + + kmem_cache_free(xen_blkif_cachep, blkif); + + return ERR_PTR(-ENOMEM); } static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, @@ -178,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); + wake_up(&blkif->shutdown_wq); blkif->xenblkd = NULL; } @@ -198,8 +253,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif) { + struct pending_req *req, *n; + int i = 0, j; + if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); + + /* Check that there is no request in use */ + list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { + list_del(&req->free_list); + + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) + kfree(req->segments[j]); + + for (j = 0; j < MAX_INDIRECT_PAGES; j++) + kfree(req->indirect_pages[j]); + + kfree(req); + i++; + } + + WARN_ON(i != XEN_BLKIF_REQS); + kmem_cache_free(xen_blkif_cachep, blkif); } @@ -678,6 +753,11 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u", + MAX_INDIRECT_SEGMENTS); + if (err) + dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)", + dev->nodename, err); err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); @@ -704,6 +784,11 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", + bdev_physical_block_size(be->blkif->vbd.bdev)); + if (err) + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", + dev->nodename); err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d89ef86220f4..a4660bbee8a6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -74,12 +74,30 @@ struct grant { struct blk_shadow { struct blkif_request req; struct request *request; - struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct grant **grants_used; + struct grant **indirect_grants; + struct scatterlist *sg; +}; + +struct split_bio { + struct bio *bio; + atomic_t pending; + int err; }; static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; +/* + * Maximum number of segments in indirect requests, the actual value used by + * the frontend driver is the minimum of this value and the value provided + * by the backend driver. + */ + +static unsigned int xen_blkif_max_segments = 32; +module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); +MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); + #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) /* @@ -98,7 +116,6 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; - struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; @@ -114,6 +131,7 @@ struct blkfront_info unsigned int discard_granularity; unsigned int discard_alignment; unsigned int feature_persistent:1; + unsigned int max_indirect_segments; int is_ready; }; @@ -142,6 +160,13 @@ static DEFINE_SPINLOCK(minor_lock); #define DEV_NAME "xvd" /* name in /dev */ +#define SEGS_PER_INDIRECT_FRAME \ + (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) +#define INDIRECT_GREFS(_segs) \ + ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) + +static int blkfront_setup_indirect(struct blkfront_info *info); + static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; @@ -358,7 +383,8 @@ static int blkif_queue_request(struct request *req) struct blkif_request *ring_req; unsigned long id; unsigned int fsect, lsect; - int i, ref; + int i, ref, n; + struct blkif_request_segment_aligned *segments = NULL; /* * Used to store if we are able to queue the request by just using @@ -369,21 +395,27 @@ static int blkif_queue_request(struct request *req) grant_ref_t gref_head; struct grant *gnt_list_entry = NULL; struct scatterlist *sg; + int nseg, max_grefs; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; - /* Check if we have enought grants to allocate a requests */ - if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) { + max_grefs = info->max_indirect_segments ? + info->max_indirect_segments + + INDIRECT_GREFS(info->max_indirect_segments) : + BLKIF_MAX_SEGMENTS_PER_REQUEST; + + /* Check if we have enough grants to allocate a requests */ + if (info->persistent_gnts_c < max_grefs) { new_persistent_gnts = 1; if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c, + max_grefs - info->persistent_gnts_c, &gref_head) < 0) { gnttab_request_free_callback( &info->callback, blkif_restart_queue_callback, info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); + max_grefs); return 1; } } else @@ -394,42 +426,67 @@ static int blkif_queue_request(struct request *req) id = get_id_from_freelist(info); info->shadow[id].request = req; - ring_req->u.rw.id = id; - ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); - ring_req->u.rw.handle = info->handle; - - ring_req->operation = rq_data_dir(req) ? - BLKIF_OP_WRITE : BLKIF_OP_READ; - - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { - /* - * Ideally we can do an unordered flush-to-disk. In case the - * backend onlysupports barriers, use that. A barrier request - * a superset of FUA, so we can implement it the same - * way. (It's also a FLUSH+FUA, since it is - * guaranteed ordered WRT previous writes.) - */ - ring_req->operation = info->flush_op; - } - if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { - /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); + ring_req->u.discard.id = id; + ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; else ring_req->u.discard.flag = 0; } else { - ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, - info->sg); - BUG_ON(ring_req->u.rw.nr_segments > - BLKIF_MAX_SEGMENTS_PER_REQUEST); - - for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { + BUG_ON(info->max_indirect_segments == 0 && + req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + BUG_ON(info->max_indirect_segments && + req->nr_phys_segments > info->max_indirect_segments); + nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); + ring_req->u.rw.id = id; + if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { + /* + * The indirect operation can only be a BLKIF_OP_READ or + * BLKIF_OP_WRITE + */ + BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); + ring_req->operation = BLKIF_OP_INDIRECT; + ring_req->u.indirect.indirect_op = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.indirect.handle = info->handle; + ring_req->u.indirect.nr_segments = nseg; + } else { + ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.rw.handle = info->handle; + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + /* + * Ideally we can do an unordered flush-to-disk. In case the + * backend onlysupports barriers, use that. A barrier request + * a superset of FUA, so we can implement it the same + * way. (It's also a FLUSH+FUA, since it is + * guaranteed ordered WRT previous writes.) + */ + ring_req->operation = info->flush_op; + } + ring_req->u.rw.nr_segments = nseg; + } + for_each_sg(info->shadow[id].sg, sg, nseg, i) { fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; + if ((ring_req->operation == BLKIF_OP_INDIRECT) && + (i % SEGS_PER_INDIRECT_FRAME == 0)) { + if (segments) + kunmap_atomic(segments); + + n = i / SEGS_PER_INDIRECT_FRAME; + gnt_list_entry = get_grant(&gref_head, info); + info->shadow[id].indirect_grants[n] = gnt_list_entry; + segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); + ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; + } + gnt_list_entry = get_grant(&gref_head, info); ref = gnt_list_entry->gref; @@ -441,8 +498,7 @@ static int blkif_queue_request(struct request *req) BUG_ON(sg->offset + sg->length > PAGE_SIZE); - shared_data = kmap_atomic( - pfn_to_page(gnt_list_entry->pfn)); + shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); bvec_data = kmap_atomic(sg_page(sg)); /* @@ -461,13 +517,23 @@ static int blkif_queue_request(struct request *req) kunmap_atomic(bvec_data); kunmap_atomic(shared_data); } - - ring_req->u.rw.seg[i] = - (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; + if (ring_req->operation != BLKIF_OP_INDIRECT) { + ring_req->u.rw.seg[i] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + } else { + n = i % SEGS_PER_INDIRECT_FRAME; + segments[n] = + (struct blkif_request_segment_aligned) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + } } + if (segments) + kunmap_atomic(segments); } info->ring.req_prod_pvt++; @@ -542,7 +608,9 @@ wait: flush_requests(info); } -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, + unsigned int physical_sector_size, + unsigned int segments) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -564,14 +632,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); - blk_queue_max_hw_sectors(rq, 512); + blk_queue_physical_block_size(rq, physical_sector_size); + blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512); /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_segments(rq, segments); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); @@ -588,13 +657,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) static void xlvbd_flush(struct blkfront_info *info) { blk_queue_flush(info->rq, info->feature_flush); - printk(KERN_INFO "blkfront: %s: %s: %s %s\n", + printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n", info->gd->disk_name, info->flush_op == BLKIF_OP_WRITE_BARRIER ? "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? "flush diskcache" : "barrier or flush"), - info->feature_flush ? "enabled" : "disabled", - info->feature_persistent ? "using persistent grants" : ""); + info->feature_flush ? "enabled;" : "disabled;", + "persistent grants:", + info->feature_persistent ? "enabled;" : "disabled;", + "indirect descriptors:", + info->max_indirect_segments ? "enabled;" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -667,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, - u16 vdisk_info, u16 sector_size) + u16 vdisk_info, u16 sector_size, + unsigned int physical_sector_size) { struct gendisk *gd; int nr_minors = 1; @@ -734,7 +807,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size)) { + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, + info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST)) { del_gendisk(gd); goto release; } @@ -818,6 +893,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) { struct grant *persistent_gnt; struct grant *n; + int i, j, segs; /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&info->io_lock); @@ -843,6 +919,47 @@ static void blkif_free(struct blkfront_info *info, int suspend) } BUG_ON(info->persistent_gnts_c != 0); + for (i = 0; i < BLK_RING_SIZE; i++) { + /* + * Clear persistent grants present in requests already + * on the shared ring + */ + if (!info->shadow[i].request) + goto free_shadow; + + segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ? + info->shadow[i].req.u.indirect.nr_segments : + info->shadow[i].req.u.rw.nr_segments; + for (j = 0; j < segs; j++) { + persistent_gnt = info->shadow[i].grants_used[j]; + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); + } + + if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT) + /* + * If this is not an indirect operation don't try to + * free indirect segments + */ + goto free_shadow; + + for (j = 0; j < INDIRECT_GREFS(segs); j++) { + persistent_gnt = info->shadow[i].indirect_grants[j]; + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + __free_page(pfn_to_page(persistent_gnt->pfn)); + kfree(persistent_gnt); + } + +free_shadow: + kfree(info->shadow[i].grants_used); + info->shadow[i].grants_used = NULL; + kfree(info->shadow[i].indirect_grants); + info->shadow[i].indirect_grants = NULL; + kfree(info->shadow[i].sg); + info->shadow[i].sg = NULL; + } + /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); spin_unlock_irq(&info->io_lock); @@ -867,12 +984,13 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, struct blkif_response *bret) { int i = 0; - struct bio_vec *bvec; - struct req_iterator iter; - unsigned long flags; + struct scatterlist *sg; char *bvec_data; void *shared_data; - unsigned int offset = 0; + int nseg; + + nseg = s->req.operation == BLKIF_OP_INDIRECT ? + s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; if (bret->operation == BLKIF_OP_READ) { /* @@ -881,26 +999,29 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * than PAGE_SIZE, we have to keep track of the current offset, * to be sure we are copying the data from the right shared page. */ - rq_for_each_segment(bvec, s->request, iter) { - BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); - if (bvec->bv_offset < offset) - i++; - BUG_ON(i >= s->req.u.rw.nr_segments); + for_each_sg(s->sg, sg, nseg, i) { + BUG_ON(sg->offset + sg->length > PAGE_SIZE); shared_data = kmap_atomic( pfn_to_page(s->grants_used[i]->pfn)); - bvec_data = bvec_kmap_irq(bvec, &flags); - memcpy(bvec_data, shared_data + bvec->bv_offset, - bvec->bv_len); - bvec_kunmap_irq(bvec_data, &flags); + bvec_data = kmap_atomic(sg_page(sg)); + memcpy(bvec_data + sg->offset, + shared_data + sg->offset, + sg->length); + kunmap_atomic(bvec_data); kunmap_atomic(shared_data); - offset = bvec->bv_offset + bvec->bv_len; } } /* Add the persistent grant into the list of free grants */ - for (i = 0; i < s->req.u.rw.nr_segments; i++) { + for (i = 0; i < nseg; i++) { list_add(&s->grants_used[i]->node, &info->persistent_gnts); info->persistent_gnts_c++; } + if (s->req.operation == BLKIF_OP_INDIRECT) { + for (i = 0; i < INDIRECT_GREFS(nseg); i++) { + list_add(&s->indirect_grants[i]->node, &info->persistent_gnts); + info->persistent_gnts_c++; + } + } } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -1034,14 +1155,6 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - - /* Allocate memory for grants */ - err = fill_grant_buffer(info, BLK_RING_SIZE * - BLKIF_MAX_SEGMENTS_PER_REQUEST); - if (err) - goto fail; - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); @@ -1223,13 +1336,84 @@ static int blkfront_probe(struct xenbus_device *dev, return 0; } +/* + * This is a clone of md_trim_bio, used to split a bio into smaller ones + */ +static void trim_bio(struct bio *bio, int offset, int size) +{ + /* 'bio' is a cloned bio which we need to trim to match + * the given offset and size. + * This requires adjusting bi_sector, bi_size, and bi_io_vec + */ + int i; + struct bio_vec *bvec; + int sofar = 0; + + size <<= 9; + if (offset == 0 && size == bio->bi_size) + return; + + bio->bi_sector += offset; + bio->bi_size = size; + offset <<= 9; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + + while (bio->bi_idx < bio->bi_vcnt && + bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { + /* remove this whole bio_vec */ + offset -= bio->bi_io_vec[bio->bi_idx].bv_len; + bio->bi_idx++; + } + if (bio->bi_idx < bio->bi_vcnt) { + bio->bi_io_vec[bio->bi_idx].bv_offset += offset; + bio->bi_io_vec[bio->bi_idx].bv_len -= offset; + } + /* avoid any complications with bi_idx being non-zero*/ + if (bio->bi_idx) { + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); + bio->bi_vcnt -= bio->bi_idx; + bio->bi_idx = 0; + } + /* Make sure vcnt and last bv are not too big */ + bio_for_each_segment(bvec, bio, i) { + if (sofar + bvec->bv_len > size) + bvec->bv_len = size - sofar; + if (bvec->bv_len == 0) { + bio->bi_vcnt = i; + break; + } + sofar += bvec->bv_len; + } +} + +static void split_bio_end(struct bio *bio, int error) +{ + struct split_bio *split_bio = bio->bi_private; + + if (error) + split_bio->err = error; + + if (atomic_dec_and_test(&split_bio->pending)) { + split_bio->bio->bi_phys_segments = 0; + bio_endio(split_bio->bio, split_bio->err); + kfree(split_bio); + } + bio_put(bio); +} static int blkif_recover(struct blkfront_info *info) { int i; - struct blkif_request *req; + struct request *req, *n; struct blk_shadow *copy; - int j; + int rc; + struct bio *bio, *cloned_bio; + struct bio_list bio_list, merge_bio; + unsigned int segs, offset; + int pending, size; + struct split_bio *split_bio; + struct list_head requests; /* Stage 1: Make a safe copy of the shadow state. */ copy = kmemdup(info->shadow, sizeof(info->shadow), @@ -1244,36 +1428,64 @@ static int blkif_recover(struct blkfront_info *info) info->shadow_free = info->ring.req_prod_pvt; info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - /* Stage 3: Find pending requests and requeue them. */ + rc = blkfront_setup_indirect(info); + if (rc) { + kfree(copy); + return rc; + } + + segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; + blk_queue_max_segments(info->rq, segs); + bio_list_init(&bio_list); + INIT_LIST_HEAD(&requests); for (i = 0; i < BLK_RING_SIZE; i++) { /* Not in use? */ if (!copy[i].request) continue; - /* Grab a request slot and copy shadow state into it. */ - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - *req = copy[i].req; - - /* We get a new request id, and must reset the shadow state. */ - req->u.rw.id = get_id_from_freelist(info); - memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); - - if (req->operation != BLKIF_OP_DISCARD) { - /* Rewrite any grant references invalidated by susp/resume. */ - for (j = 0; j < req->u.rw.nr_segments; j++) - gnttab_grant_foreign_access_ref( - req->u.rw.seg[j].gref, - info->xbdev->otherend_id, - pfn_to_mfn(copy[i].grants_used[j]->pfn), - 0); + /* + * Get the bios in the request so we can re-queue them. + */ + if (copy[i].request->cmd_flags & + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { + /* + * Flush operations don't contain bios, so + * we need to requeue the whole request + */ + list_add(©[i].request->queuelist, &requests); + continue; } - info->shadow[req->u.rw.id].req = *req; - - info->ring.req_prod_pvt++; + merge_bio.head = copy[i].request->bio; + merge_bio.tail = copy[i].request->biotail; + bio_list_merge(&bio_list, &merge_bio); + copy[i].request->bio = NULL; + blk_put_request(copy[i].request); } kfree(copy); + /* + * Empty the queue, this is important because we might have + * requests in the queue with more segments than what we + * can handle now. + */ + spin_lock_irq(&info->io_lock); + while ((req = blk_fetch_request(info->rq)) != NULL) { + if (req->cmd_flags & + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { + list_add(&req->queuelist, &requests); + continue; + } + merge_bio.head = req->bio; + merge_bio.tail = req->biotail; + bio_list_merge(&bio_list, &merge_bio); + req->bio = NULL; + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) + pr_alert("diskcache flush request found!\n"); + __blk_put_request(info->rq, req); + } + spin_unlock_irq(&info->io_lock); + xenbus_switch_state(info->xbdev, XenbusStateConnected); spin_lock_irq(&info->io_lock); @@ -1281,14 +1493,50 @@ static int blkif_recover(struct blkfront_info *info) /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; - /* Send off requeued requests */ - flush_requests(info); - /* Kick any other new requests queued since we resumed */ kick_pending_request_queues(info); + list_for_each_entry_safe(req, n, &requests, queuelist) { + /* Requeue pending requests (flush or discard) */ + list_del_init(&req->queuelist); + BUG_ON(req->nr_phys_segments > segs); + blk_requeue_request(info->rq, req); + } spin_unlock_irq(&info->io_lock); + while ((bio = bio_list_pop(&bio_list)) != NULL) { + /* Traverse the list of pending bios and re-queue them */ + if (bio_segments(bio) > segs) { + /* + * This bio has more segments than what we can + * handle, we have to split it. + */ + pending = (bio_segments(bio) + segs - 1) / segs; + split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO); + BUG_ON(split_bio == NULL); + atomic_set(&split_bio->pending, pending); + split_bio->bio = bio; + for (i = 0; i < pending; i++) { + offset = (i * segs * PAGE_SIZE) >> 9; + size = min((unsigned int)(segs * PAGE_SIZE) >> 9, + (unsigned int)(bio->bi_size >> 9) - offset); + cloned_bio = bio_clone(bio, GFP_NOIO); + BUG_ON(cloned_bio == NULL); + trim_bio(cloned_bio, offset, size); + cloned_bio->bi_private = split_bio; + cloned_bio->bi_end_io = split_bio_end; + submit_bio(cloned_bio->bi_rw, cloned_bio); + } + /* + * Now we have to wait for all those smaller bios to + * end, so we can also end the "parent" bio. + */ + continue; + } + /* We don't need to split this bio */ + submit_bio(bio->bi_rw, bio); + } + return 0; } @@ -1308,8 +1556,12 @@ static int blkfront_resume(struct xenbus_device *dev) blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); err = talk_to_blkback(dev, info); - if (info->connected == BLKIF_STATE_SUSPENDED && !err) - err = blkif_recover(info); + + /* + * We have to wait for the backend to switch to + * connected state, since we want to read which + * features it supports. + */ return err; } @@ -1387,6 +1639,60 @@ static void blkfront_setup_discard(struct blkfront_info *info) kfree(type); } +static int blkfront_setup_indirect(struct blkfront_info *info) +{ + unsigned int indirect_segments, segs; + int err, i; + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-max-indirect-segments", "%u", &indirect_segments, + NULL); + if (err) { + info->max_indirect_segments = 0; + segs = BLKIF_MAX_SEGMENTS_PER_REQUEST; + } else { + info->max_indirect_segments = min(indirect_segments, + xen_blkif_max_segments); + segs = info->max_indirect_segments; + } + + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); + if (err) + goto out_of_memory; + + for (i = 0; i < BLK_RING_SIZE; i++) { + info->shadow[i].grants_used = kzalloc( + sizeof(info->shadow[i].grants_used[0]) * segs, + GFP_NOIO); + info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO); + if (info->max_indirect_segments) + info->shadow[i].indirect_grants = kzalloc( + sizeof(info->shadow[i].indirect_grants[0]) * + INDIRECT_GREFS(segs), + GFP_NOIO); + if ((info->shadow[i].grants_used == NULL) || + (info->shadow[i].sg == NULL) || + (info->max_indirect_segments && + (info->shadow[i].indirect_grants == NULL))) + goto out_of_memory; + sg_init_table(info->shadow[i].sg, segs); + } + + + return 0; + +out_of_memory: + for (i = 0; i < BLK_RING_SIZE; i++) { + kfree(info->shadow[i].grants_used); + info->shadow[i].grants_used = NULL; + kfree(info->shadow[i].sg); + info->shadow[i].sg = NULL; + kfree(info->shadow[i].indirect_grants); + info->shadow[i].indirect_grants = NULL; + } + return -ENOMEM; +} + /* * Invoked when the backend is finally 'ready' (and has told produced * the details about the physical device - #sectors, size, etc). @@ -1395,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info) { unsigned long long sectors; unsigned long sector_size; + unsigned int physical_sector_size; unsigned int binfo; int err; int barrier, flush, discard, persistent; @@ -1414,8 +1721,15 @@ static void blkfront_connect(struct blkfront_info *info) set_capacity(info->gd, sectors); revalidate_disk(info->gd); - /* fall through */ + return; case BLKIF_STATE_SUSPENDED: + /* + * If we are recovering from suspension, we need to wait + * for the backend to announce it's features before + * reconnecting, at least we need to know if the backend + * supports indirect descriptors, and how many. + */ + blkif_recover(info); return; default: @@ -1437,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info) return; } + /* + * physcial-sector-size is a newer field, so old backends may not + * provide this. Assume physical sector size to be the same as + * sector_size in that case. + */ + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "physical-sector-size", "%u", &physical_sector_size); + if (err != 1) + physical_sector_size = sector_size; + info->feature_flush = 0; info->flush_op = 0; @@ -1483,7 +1807,15 @@ static void blkfront_connect(struct blkfront_info *info) else info->feature_persistent = persistent; - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); + err = blkfront_setup_indirect(info); + if (err) { + xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", + info->xbdev->otherend); + return; + } + + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, + physical_sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b012d7600e1a..7cde885011ed 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -103,10 +103,10 @@ struct pstate_adjust_policy { static struct pstate_adjust_policy default_policy = { .sample_rate_ms = 10, .deadband = 0, - .setpoint = 109, - .p_gain_pct = 17, + .setpoint = 97, + .p_gain_pct = 20, .d_gain_pct = 0, - .i_gain_pct = 4, + .i_gain_pct = 0, }; struct perf_limits { @@ -468,12 +468,12 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) { int32_t busy_scaled; - int32_t core_busy, turbo_pstate, current_pstate; + int32_t core_busy, max_pstate, current_pstate; core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); - turbo_pstate = int_tofp(cpu->pstate.turbo_pstate); + max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); - busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate)); + busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); return fp_toint(busy_scaled); } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 5996521a1caf..84573b4d6f92 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -429,7 +429,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, dma_addr_t src_dma, dst_dma; int ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); if (!desc) { dev_err(jrdev, "unable to allocate key input memory\n"); return -ENOMEM; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 27e86d938262..89e109022d78 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices); */ static void const *edac_mc_owner; +static struct bus_type mc_bus[EDAC_MAX_MCS]; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int ret = -EINVAL; edac_dbg(0, "\n"); + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index ef15a7e613bc..e7c32c4f7837 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, return -ENODEV; csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; + csrow->dev.bus = mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; csrow->mci = mci; @@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.bus = mci->bus; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; @@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); + + edac_dbg(0, "creating bus %s\n", mci->bus->name); + + err = bus_register(mci->bus); if (err < 0) return err; @@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) device_initialize(&mci->dev); mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.bus = mci->bus; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); @@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1064,8 +1066,8 @@ fail: } fail2: device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 1b635178cc44..157b934e8ce3 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci) if (!i5100_debugfs) return -ENODEV; - priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); if (!priv->debugfs) return -ENOMEM; diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c index f4491a497cc8..c2fa77086eb5 100644 --- a/drivers/gpio/gpio-msm-v2.c +++ b/drivers/gpio/gpio-msm-v2.c @@ -378,7 +378,7 @@ static int msm_gpio_probe(struct platform_device *pdev) int ret, ngpio; struct resource *res; - if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) { + if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) { dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__); return -EINVAL; } diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index dfeb3a3a8f20..c57244ef428b 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1037,6 +1037,18 @@ omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start, IRQ_NOREQUEST | IRQ_NOPROBE, 0); } +#if defined(CONFIG_OF_GPIO) +static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip) +{ + return chip->of_node != NULL; +} +#else +static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip) +{ + return false; +} +#endif + static void omap_gpio_chip_init(struct gpio_bank *bank) { int j; @@ -1068,24 +1080,68 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) gpiochip_add(&bank->chip); - for (j = 0; j < bank->width; j++) { - int irq = irq_create_mapping(bank->domain, j); - irq_set_lockdep_class(irq, &gpio_lock_class); - irq_set_chip_data(irq, bank); - if (bank->is_mpuio) { - omap_mpuio_alloc_gc(bank, irq, bank->width); - } else { - irq_set_chip_and_handler(irq, &gpio_irq_chip, - handle_simple_irq); - set_irq_flags(irq, IRQF_VALID); - } - } + /* + * REVISIT these explicit calls to irq_create_mapping() + * to do the GPIO to IRQ domain mapping for each GPIO in + * the bank can be removed once all OMAP platforms have + * been migrated to Device Tree boot only. + * Since in DT boot irq_create_mapping() is called from + * irq_create_of_mapping() only for the GPIO lines that + * are used as interrupts. + */ + if (!omap_gpio_chip_boot_dt(&bank->chip)) + for (j = 0; j < bank->width; j++) + irq_create_mapping(bank->domain, j); irq_set_chained_handler(bank->irq, gpio_irq_handler); irq_set_handler_data(bank->irq, bank); } static const struct of_device_id omap_gpio_match[]; +static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct gpio_bank *bank = d->host_data; + int gpio; + int ret; + + if (!bank) + return -EINVAL; + + irq_set_lockdep_class(virq, &gpio_lock_class); + irq_set_chip_data(virq, bank); + if (bank->is_mpuio) { + omap_mpuio_alloc_gc(bank, virq, bank->width); + } else { + irq_set_chip_and_handler(virq, &gpio_irq_chip, + handle_simple_irq); + set_irq_flags(virq, IRQF_VALID); + } + + /* + * REVISIT most GPIO IRQ chip drivers need to call + * gpio_request() before a GPIO line can be used as an + * IRQ. Ideally this should be handled by the IRQ core + * but until then this has to be done on a per driver + * basis. Remove this once this is managed by the core. + */ + if (omap_gpio_chip_boot_dt(&bank->chip)) { + gpio = irq_to_gpio(bank, hwirq); + ret = gpio_request_one(gpio, GPIOF_IN, NULL); + if (ret) { + dev_err(bank->dev, "Could not request GPIO%d\n", gpio); + return ret; + } + } + + return 0; +} + +static struct irq_domain_ops omap_gpio_irq_ops = { + .xlate = irq_domain_xlate_onetwocell, + .map = omap_gpio_irq_map, +}; + static int omap_gpio_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1151,10 +1207,10 @@ static int omap_gpio_probe(struct platform_device *pdev) } bank->domain = irq_domain_add_legacy(node, bank->width, irq_base, - 0, &irq_domain_simple_ops, NULL); + 0, &omap_gpio_irq_ops, bank); #else bank->domain = irq_domain_add_linear(node, bank->width, - &irq_domain_simple_ops, NULL); + &omap_gpio_irq_ops, bank); #endif if (!bank->domain) { dev_err(dev, "Couldn't register an IRQ domain\n"); diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 738a4294d820..6a647493ca7f 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -677,6 +677,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) /* don't break so fail path works correct */ fail = 1; break; + + if (connector->dpms != DRM_MODE_DPMS_ON) { + DRM_DEBUG_KMS("connector dpms not on, full mode switch\n"); + mode_changed = true; + } } } @@ -754,6 +759,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) ret = -EINVAL; goto fail; } + DRM_DEBUG_KMS("Setting connector DPMS state to on\n"); + for (i = 0; i < set->num_connectors; i++) { + DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, + drm_get_connector_name(set->connectors[i])); + set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); + } } drm_helper_disable_unused_functions(dev); } else if (fb_changed) { @@ -771,22 +782,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) } } - /* - * crtc set_config helpers implicit set the crtc and all connected - * encoders to DPMS on for a full mode set. But for just an fb update it - * doesn't do that. To not confuse userspace, do an explicit DPMS_ON - * unconditionally. This will also ensure driver internal dpms state is - * consistent again. - */ - if (set->crtc->enabled) { - DRM_DEBUG_KMS("Setting connector DPMS state to on\n"); - for (i = 0; i < set->num_connectors; i++) { - DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id, - drm_get_connector_name(set->connectors[i])); - set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON); - } - } - kfree(save_connectors); kfree(save_encoders); kfree(save_crtcs); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index cf188ab7051a..66c63808fa35 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1495,6 +1495,15 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev_priv->dev = dev; dev_priv->info = info; + spin_lock_init(&dev_priv->irq_lock); + spin_lock_init(&dev_priv->gpu_error.lock); + spin_lock_init(&dev_priv->rps.lock); + spin_lock_init(&dev_priv->gt_lock); + spin_lock_init(&dev_priv->backlight.lock); + mutex_init(&dev_priv->dpio_lock); + mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->modeset_restore_lock); + i915_dump_device_info(dev_priv); if (i915_get_bridge_dev(dev)) { @@ -1585,6 +1594,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) intel_detect_pch(dev); intel_irq_init(dev); + intel_gt_sanitize(dev); intel_gt_init(dev); /* Try to make sure MCHBAR is enabled before poking at it */ @@ -1610,15 +1620,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); - spin_lock_init(&dev_priv->irq_lock); - spin_lock_init(&dev_priv->gpu_error.lock); - spin_lock_init(&dev_priv->rps.lock); - spin_lock_init(&dev_priv->backlight.lock); - mutex_init(&dev_priv->dpio_lock); - - mutex_init(&dev_priv->rps.hw_lock); - mutex_init(&dev_priv->modeset_restore_lock); - dev_priv->num_plane = 1; if (IS_VALLEYVIEW(dev)) dev_priv->num_plane = 2; @@ -1648,7 +1649,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (INTEL_INFO(dev)->num_pipes) { /* Must be done after probing outputs */ intel_opregion_init(dev); - acpi_video_register_with_quirks(); + acpi_video_register(); } if (IS_GEN5(dev)) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f4af1ca0fb62..45b3c030f483 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -706,7 +706,7 @@ static int i915_drm_thaw(struct drm_device *dev) { int error = 0; - intel_gt_reset(dev); + intel_gt_sanitize(dev); if (drm_core_check_feature(dev, DRIVER_MODESET)) { mutex_lock(&dev->struct_mutex); @@ -732,7 +732,7 @@ int i915_resume(struct drm_device *dev) pci_set_master(dev->pdev); - intel_gt_reset(dev); + intel_gt_sanitize(dev); /* * Platforms with opregion should have sane BIOS, older ones (gen3 and @@ -1253,21 +1253,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) #define __i915_read(x, y) \ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ + unsigned long irqflags; \ u##x val = 0; \ + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (IS_GEN5(dev_priv->dev)) \ ilk_dummy_write(dev_priv); \ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ - unsigned long irqflags; \ - spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (dev_priv->forcewake_count == 0) \ dev_priv->gt.force_wake_get(dev_priv); \ val = read##y(dev_priv->regs + reg); \ if (dev_priv->forcewake_count == 0) \ dev_priv->gt.force_wake_put(dev_priv); \ - spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ } else { \ val = read##y(dev_priv->regs + reg); \ } \ + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ trace_i915_reg_rw(false, reg, val, sizeof(val)); \ return val; \ } @@ -1280,8 +1280,10 @@ __i915_read(64, q) #define __i915_write(x, y) \ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ + unsigned long irqflags; \ u32 __fifo_ret = 0; \ trace_i915_reg_rw(true, reg, val, sizeof(val)); \ + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ } \ @@ -1293,6 +1295,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ gen6_gt_check_fifodbg(dev_priv); \ } \ hsw_unclaimed_reg_check(dev_priv, reg); \ + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \ } __i915_write(8, b) __i915_write(16, w) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a416645bcd23..d2ee3343c943 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -555,6 +555,7 @@ enum intel_sbi_destination { #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) +#define QUIRK_NO_PCH_PWM_ENABLE (1<<3) struct intel_fbdev; struct intel_fbc_work; @@ -1583,7 +1584,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged); extern void intel_irq_init(struct drm_device *dev); extern void intel_hpd_init(struct drm_device *dev); extern void intel_gt_init(struct drm_device *dev); -extern void intel_gt_reset(struct drm_device *dev); +extern void intel_gt_sanitize(struct drm_device *dev); void i915_error_state_free(struct kref *error_ref); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 97afd2639fb6..d9e2208cfe98 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2258,7 +2258,17 @@ void i915_gem_restore_fences(struct drm_device *dev) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - i915_gem_write_fence(dev, i, reg->obj); + + /* + * Commit delayed tiling changes if we have an object still + * attached to the fence, otherwise just clear the fence. + */ + if (reg->obj) { + i915_gem_object_update_fence(reg->obj, reg, + reg->obj->tiling_mode); + } else { + i915_gem_write_fence(dev, i, NULL); + } } } @@ -2795,6 +2805,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) mb(); + WARN(obj && (!obj->stride || !obj->tiling_mode), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + obj->stride, obj->tiling_mode); + switch (INTEL_INFO(dev)->gen) { case 7: case 6: @@ -2836,6 +2850,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, fence->obj = NULL; list_del_init(&fence->lru_list); } + obj->fence_dirty = false; } static int @@ -2965,7 +2980,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; i915_gem_object_update_fence(obj, reg, enable); - obj->fence_dirty = false; return 0; } diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 324211ac9c55..b042ee5c4070 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -301,7 +301,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder, struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); - intel_dp->DP = intel_dig_port->port_reversal | + intel_dp->DP = intel_dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW; intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count); @@ -1109,7 +1109,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder) * enabling the port. */ I915_WRITE(DDI_BUF_CTL(port), - intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE); + intel_dig_port->saved_port_bits | + DDI_BUF_CTL_ENABLE); } else if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -1347,8 +1348,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port) intel_encoder->get_config = intel_ddi_get_config; intel_dig_port->port = port; - intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) & - DDI_BUF_PORT_REVERSAL; + intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & + (DDI_BUF_PORT_REVERSAL | + DDI_A_4_LANES); intel_dig_port->dp.output_reg = DDI_BUF_CTL(port); intel_encoder->type = INTEL_OUTPUT_UNKNOWN; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 85f3eb74d2b7..5fb305840db8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4913,22 +4913,19 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc, uint32_t tmp; tmp = I915_READ(PFIT_CONTROL); + if (!(tmp & PFIT_ENABLE)) + return; + /* Check whether the pfit is attached to our pipe. */ if (INTEL_INFO(dev)->gen < 4) { if (crtc->pipe != PIPE_B) return; - - /* gen2/3 store dither state in pfit control, needs to match */ - pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE; } else { if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT)) return; } - if (!(tmp & PFIT_ENABLE)) - return; - - pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL); + pipe_config->gmch_pfit.control = tmp; pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS); if (INTEL_INFO(dev)->gen < 5) pipe_config->gmch_pfit.lvds_border_bits = @@ -8317,6 +8314,8 @@ check_shared_dpll_state(struct drm_device *dev) pll->active, pll->refcount); WARN(pll->active && !pll->on, "pll in active use but not on in sw tracking\n"); + WARN(pll->on && !pll->active, + "pll in on but not on in use in sw tracking\n"); WARN(pll->on != active, "pll on state mismatch (expected %i, found %i)\n", pll->on, active); @@ -8541,15 +8540,20 @@ static void intel_set_config_restore_state(struct drm_device *dev, } static bool -is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors, - int num_connectors) +is_crtc_connector_off(struct drm_mode_set *set) { int i; - for (i = 0; i < num_connectors; i++) - if (connectors[i].encoder && - connectors[i].encoder->crtc == crtc && - connectors[i].dpms != DRM_MODE_DPMS_ON) + if (set->num_connectors == 0) + return false; + + if (WARN_ON(set->connectors == NULL)) + return false; + + for (i = 0; i < set->num_connectors; i++) + if (set->connectors[i]->encoder && + set->connectors[i]->encoder->crtc == set->crtc && + set->connectors[i]->dpms != DRM_MODE_DPMS_ON) return true; return false; @@ -8562,10 +8566,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set, /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ - if (set->connectors != NULL && - is_crtc_connector_off(set->crtc, *set->connectors, - set->num_connectors)) { - config->mode_changed = true; + if (is_crtc_connector_off(set)) { + config->mode_changed = true; } else if (set->crtc->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->fb == NULL) { @@ -9398,6 +9400,17 @@ static void quirk_invert_brightness(struct drm_device *dev) DRM_INFO("applying inverted panel brightness quirk\n"); } +/* + * Some machines (Dell XPS13) suffer broken backlight controls if + * BLM_PCH_PWM_ENABLE is set. + */ +static void quirk_no_pcm_pwm_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE; + DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -9467,6 +9480,11 @@ static struct intel_quirk intel_quirks[] = { /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, + + /* Dell XPS13 HD Sandy Bridge */ + { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable }, + /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */ + { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable }, }; static void intel_init_quirks(struct drm_device *dev) @@ -9817,8 +9835,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } pll->refcount = pll->active; - DRM_DEBUG_KMS("%s hw state readout: refcount %i\n", - pll->name, pll->refcount); + DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n", + pll->name, pll->refcount, pll->on); } list_for_each_entry(encoder, &dev->mode_config.encoder_list, @@ -9869,6 +9887,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_plane *plane; struct intel_crtc *crtc; struct intel_encoder *encoder; + int i; intel_modeset_readout_hw_state(dev); @@ -9884,6 +9903,18 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]"); } + for (i = 0; i < dev_priv->num_shared_dpll; i++) { + struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i]; + + if (!pll->on || pll->active) + continue; + + DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name); + + pll->disable(dev_priv, pll); + pll->on = false; + } + if (force_restore) { /* * We need to use raw interfaces for restoring state to avoid diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c8c9b6f48230..b7d6e09456ce 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -504,7 +504,7 @@ struct intel_dp { struct intel_digital_port { struct intel_encoder base; enum port port; - u32 port_reversal; + u32 saved_port_bits; struct intel_dp dp; struct intel_hdmi hdmi; }; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 98df2a0c85bd..2fd3fd5b943e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -785,10 +785,22 @@ static void intel_disable_hdmi(struct intel_encoder *encoder) } } +static int hdmi_portclock_limit(struct intel_hdmi *hdmi) +{ + struct drm_device *dev = intel_hdmi_to_dev(hdmi); + + if (IS_G4X(dev)) + return 165000; + else if (IS_HASWELL(dev)) + return 300000; + else + return 225000; +} + static int intel_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - if (mode->clock > 165000) + if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector))) return MODE_CLOCK_HIGH; if (mode->clock < 20000) return MODE_CLOCK_LOW; @@ -806,6 +818,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode; int clock_12bpc = pipe_config->requested_mode.clock * 3 / 2; + int portclock_limit = hdmi_portclock_limit(intel_hdmi); int desired_bpp; if (intel_hdmi->color_range_auto) { @@ -829,7 +842,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= 225000 + if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= portclock_limit && HAS_PCH_SPLIT(dev)) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; @@ -846,7 +859,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, pipe_config->pipe_bpp = desired_bpp; } - if (adjusted_mode->clock > 225000) { + if (adjusted_mode->clock > portclock_limit) { DRM_DEBUG_KMS("too high HDMI clock, rejecting mode\n"); return false; } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 021e8daa022d..61348eae2f04 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -109,6 +109,13 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, flags |= DRM_MODE_FLAG_PVSYNC; pipe_config->adjusted_mode.flags |= flags; + + /* gen2/3 store dither state in pfit control, needs to match */ + if (INTEL_INFO(dev)->gen < 4) { + tmp = I915_READ(PFIT_CONTROL); + + pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE; + } } /* The LVDS pin pair needs to be on before the DPLLs are enabled. @@ -290,14 +297,11 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, intel_pch_panel_fitting(intel_crtc, pipe_config, intel_connector->panel.fitting_mode); - return true; } else { intel_gmch_panel_fitting(intel_crtc, pipe_config, intel_connector->panel.fitting_mode); - } - drm_mode_set_crtcinfo(adjusted_mode, 0); - pipe_config->timings_set = true; + } /* * XXX: It would be nice to support lower refresh rates on the diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 80bea1d3209f..67e2c1f1c9a8 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -194,6 +194,9 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, adjusted_mode->vdisplay == mode->vdisplay) goto out; + drm_mode_set_crtcinfo(adjusted_mode, 0); + pipe_config->timings_set = true; + switch (fitting_mode) { case DRM_MODE_SCALE_CENTER: /* @@ -580,7 +583,8 @@ void intel_panel_enable_backlight(struct drm_device *dev, POSTING_READ(reg); I915_WRITE(reg, tmp | BLM_PWM_ENABLE); - if (HAS_PCH_SPLIT(dev)) { + if (HAS_PCH_SPLIT(dev) && + !(dev_priv->quirks & QUIRK_NO_PCH_PWM_ENABLE)) { tmp = I915_READ(BLC_PWM_PCH_CTL1); tmp |= BLM_PCH_PWM_ENABLE; tmp &= ~BLM_PCH_OVERRIDE_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d10e6735771f..51a2a60f5bfc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5476,7 +5476,7 @@ static void vlv_force_wake_put(struct drm_i915_private *dev_priv) gen6_gt_check_fifodbg(dev_priv); } -void intel_gt_reset(struct drm_device *dev) +void intel_gt_sanitize(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -5487,16 +5487,16 @@ void intel_gt_reset(struct drm_device *dev) if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) __gen6_gt_force_wake_mt_reset(dev_priv); } + + /* BIOS often leaves RC6 enabled, but disable it for hw init */ + if (INTEL_INFO(dev)->gen >= 6) + intel_disable_gt_powersave(dev); } void intel_gt_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - spin_lock_init(&dev_priv->gt_lock); - - intel_gt_reset(dev); - if (IS_VALLEYVIEW(dev)) { dev_priv->gt.force_wake_get = vlv_force_wake_get; dev_priv->gt.force_wake_put = vlv_force_wake_put; diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c index 262c9f5f5f60..ce860de43e61 100644 --- a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c @@ -90,6 +90,7 @@ nvc0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00008000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nvc0_bsp_cclass; nv_engine(priv)->sclass = nvc0_bsp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c index c46882c83982..ba6aeca0285e 100644 --- a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c @@ -90,6 +90,7 @@ nve0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00008000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nve0_bsp_cclass; nv_engine(priv)->sclass = nve0_bsp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/falcon.c b/drivers/gpu/drm/nouveau/core/engine/falcon.c index 3c7a31f7590e..e03fc8e4dc1d 100644 --- a/drivers/gpu/drm/nouveau/core/engine/falcon.c +++ b/drivers/gpu/drm/nouveau/core/engine/falcon.c @@ -23,6 +23,25 @@ #include <engine/falcon.h> #include <subdev/timer.h> +void +nouveau_falcon_intr(struct nouveau_subdev *subdev) +{ + struct nouveau_falcon *falcon = (void *)subdev; + u32 dispatch = nv_ro32(falcon, 0x01c); + u32 intr = nv_ro32(falcon, 0x008) & dispatch & ~(dispatch >> 16); + + if (intr & 0x00000010) { + nv_debug(falcon, "ucode halted\n"); + nv_wo32(falcon, 0x004, 0x00000010); + intr &= ~0x00000010; + } + + if (intr) { + nv_error(falcon, "unhandled intr 0x%08x\n", intr); + nv_wo32(falcon, 0x004, intr); + } +} + u32 _nouveau_falcon_rd32(struct nouveau_object *object, u64 addr) { diff --git a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c index 98072c1ff360..73719aaa62d6 100644 --- a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c @@ -90,6 +90,7 @@ nvc0_ppp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00000002; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nvc0_ppp_cclass; nv_engine(priv)->sclass = nvc0_ppp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c index 1879229b60eb..ac1f62aace72 100644 --- a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c @@ -90,6 +90,7 @@ nvc0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00020000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nvc0_vp_cclass; nv_engine(priv)->sclass = nvc0_vp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c index d28ecbf7bc49..d4c3108479c9 100644 --- a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c @@ -90,6 +90,7 @@ nve0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return ret; nv_subdev(priv)->unit = 0x00020000; + nv_subdev(priv)->intr = nouveau_falcon_intr; nv_engine(priv)->cclass = &nve0_vp_cclass; nv_engine(priv)->sclass = nve0_vp_sclass; return 0; diff --git a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h index 1edec386ab36..181aa7da524d 100644 --- a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h @@ -72,6 +72,8 @@ int nouveau_falcon_create_(struct nouveau_object *, struct nouveau_object *, struct nouveau_oclass *, u32, bool, const char *, const char *, int, void **); +void nouveau_falcon_intr(struct nouveau_subdev *subdev); + #define _nouveau_falcon_dtor _nouveau_engine_dtor int _nouveau_falcon_init(struct nouveau_object *); int _nouveau_falcon_fini(struct nouveau_object *, bool); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4b1afb131380..4e7ee5f4155c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -148,6 +148,7 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) if (unlikely(nvbo->gem)) DRM_ERROR("bo %p still attached to GEM object\n", bo); + WARN_ON(nvbo->pin_refcnt > 0); nv10_bo_put_tile_region(dev, nvbo->tile, NULL); kfree(nvbo); } @@ -197,6 +198,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, size_t acc_size; int ret; int type = ttm_bo_type_device; + int max_size = INT_MAX & ~((1 << drm->client.base.vm->vmm->lpg_shift) - 1); + + if (size <= 0 || size > max_size) { + nv_warn(drm, "skipped size %x\n", (u32)size); + return -EINVAL; + } if (sg) type = ttm_bo_type_sg; @@ -340,13 +347,15 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_buffer_object *bo = &nvbo->bo; - int ret; + int ret, ref; ret = ttm_bo_reserve(bo, false, false, false, 0); if (ret) return ret; - if (--nvbo->pin_refcnt) + ref = --nvbo->pin_refcnt; + WARN_ON_ONCE(ref < 0); + if (ref) goto out; nouveau_bo_placement_set(nvbo, bo->mem.placement, 0); @@ -578,7 +587,7 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle) int ret = RING_SPACE(chan, 2); if (ret == 0) { BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1); - OUT_RING (chan, handle); + OUT_RING (chan, handle & 0x0000ffff); FIRE_RING (chan); } return ret; @@ -973,7 +982,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, struct ttm_mem_reg *old_mem = &bo->mem; int ret; - mutex_lock(&chan->cli->mutex); + mutex_lock_nested(&chan->cli->mutex, SINGLE_DEPTH_NESTING); /* create temporary vmas for the transfer and attach them to the * old nouveau_mem node, these will get cleaned up after ttm has @@ -1014,7 +1023,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct ttm_mem_reg *, struct ttm_mem_reg *); int (*init)(struct nouveau_channel *, u32 handle); } _methods[] = { - { "COPY", 0, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init }, + { "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init }, { "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init }, { "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init }, @@ -1034,7 +1043,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct nouveau_channel *chan; u32 handle = (mthd->engine << 16) | mthd->oclass; - if (mthd->init == nve0_bo_move_init) + if (mthd->engine) chan = drm->cechan; else chan = drm->channel; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 708b2d1c0037..907d20ef6d4d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -138,7 +138,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev, { struct nouveau_framebuffer *nouveau_fb; struct drm_gem_object *gem; - int ret; + int ret = -ENOMEM; gem = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]); if (!gem) @@ -146,15 +146,19 @@ nouveau_user_framebuffer_create(struct drm_device *dev, nouveau_fb = kzalloc(sizeof(struct nouveau_framebuffer), GFP_KERNEL); if (!nouveau_fb) - return ERR_PTR(-ENOMEM); + goto err_unref; ret = nouveau_framebuffer_init(dev, nouveau_fb, mode_cmd, nouveau_gem_object(gem)); - if (ret) { - drm_gem_object_unreference(gem); - return ERR_PTR(ret); - } + if (ret) + goto err; return &nouveau_fb->base; + +err: + kfree(nouveau_fb); +err_unref: + drm_gem_object_unreference(gem); + return ERR_PTR(ret); } static const struct drm_mode_config_funcs nouveau_mode_config_funcs = { @@ -524,9 +528,12 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_page_flip_state *s; struct nouveau_channel *chan = NULL; struct nouveau_fence *fence; - struct list_head res; - struct ttm_validate_buffer res_val[2]; + struct ttm_validate_buffer resv[2] = { + { .bo = &old_bo->bo }, + { .bo = &new_bo->bo }, + }; struct ww_acquire_ctx ticket; + LIST_HEAD(res); int ret; if (!drm->channel) @@ -545,27 +552,19 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, chan = drm->channel; spin_unlock(&old_bo->bo.bdev->fence_lock); - mutex_lock(&chan->cli->mutex); - if (new_bo != old_bo) { ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM); - if (likely(!ret)) { - res_val[0].bo = &old_bo->bo; - res_val[1].bo = &new_bo->bo; - INIT_LIST_HEAD(&res); - list_add_tail(&res_val[0].head, &res); - list_add_tail(&res_val[1].head, &res); - ret = ttm_eu_reserve_buffers(&ticket, &res); - if (ret) - nouveau_bo_unpin(new_bo); - } - } else - ret = ttm_bo_reserve(&new_bo->bo, false, false, false, 0); + if (ret) + goto fail_free; - if (ret) { - mutex_unlock(&chan->cli->mutex); - goto fail_free; + list_add(&resv[1].head, &res); } + list_add(&resv[0].head, &res); + + mutex_lock(&chan->cli->mutex); + ret = ttm_eu_reserve_buffers(&ticket, &res); + if (ret) + goto fail_unpin; /* Initialize a page flip struct */ *s = (struct nouveau_page_flip_state) @@ -576,10 +575,8 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* Emit a page flip */ if (nv_device(drm->device)->card_type >= NV_50) { ret = nv50_display_flip_next(crtc, fb, chan, 0); - if (ret) { - mutex_unlock(&chan->cli->mutex); + if (ret) goto fail_unreserve; - } } ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); @@ -590,22 +587,18 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* Update the crtc struct and cleanup */ crtc->fb = fb; - if (old_bo != new_bo) { - ttm_eu_fence_buffer_objects(&ticket, &res, fence); + ttm_eu_fence_buffer_objects(&ticket, &res, fence); + if (old_bo != new_bo) nouveau_bo_unpin(old_bo); - } else { - nouveau_bo_fence(new_bo, fence); - ttm_bo_unreserve(&new_bo->bo); - } nouveau_fence_unref(&fence); return 0; fail_unreserve: - if (old_bo != new_bo) { - ttm_eu_backoff_reservation(&ticket, &res); + ttm_eu_backoff_reservation(&ticket, &res); +fail_unpin: + mutex_unlock(&chan->cli->mutex); + if (old_bo != new_bo) nouveau_bo_unpin(new_bo); - } else - ttm_bo_unreserve(&new_bo->bo); fail_free: kfree(s); return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 218a4b522fe5..61972668fd05 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -192,6 +192,18 @@ nouveau_accel_init(struct nouveau_drm *drm) arg0 = NVE0_CHANNEL_IND_ENGINE_GR; arg1 = 1; + } else + if (device->chipset >= 0xa3 && + device->chipset != 0xaa && + device->chipset != 0xac) { + ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE, + NVDRM_CHAN + 1, NvDmaFB, NvDmaTT, + &drm->cechan); + if (ret) + NV_ERROR(drm, "failed to create ce channel, %d\n", ret); + + arg0 = NvDmaFB; + arg1 = NvDmaTT; } else { arg0 = NvDmaFB; arg1 = NvDmaTT; @@ -284,8 +296,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev, return 0; } -static struct lock_class_key drm_client_lock_class_key; - static int nouveau_drm_load(struct drm_device *dev, unsigned long flags) { @@ -297,7 +307,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm); if (ret) return ret; - lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key); dev->dev_private = drm; drm->dev = dev; diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 9352010030e9..4c1bc061fae2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -385,6 +385,7 @@ out_unlock: mutex_unlock(&dev->struct_mutex); if (chan) nouveau_bo_vma_del(nvbo, &fbcon->nouveau_fb.vma); + nouveau_bo_unmap(nvbo); out_unpin: nouveau_bo_unpin(nvbo); out_unref: diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 1680d9187bab..be3149932c2d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -143,7 +143,7 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) int ret; fence->channel = chan; - fence->timeout = jiffies + (3 * DRM_HZ); + fence->timeout = jiffies + (15 * DRM_HZ); fence->sequence = ++fctx->sequence; ret = fctx->emit(fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index e72d09c068a8..830cb7bad922 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -50,12 +50,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem) return; nvbo->gem = NULL; - /* Lockdep hates you for doing reserve with gem object lock held */ - if (WARN_ON_ONCE(nvbo->pin_refcnt)) { - nvbo->pin_refcnt = 1; - nouveau_bo_unpin(nvbo); - } - if (gem->import_attach) drm_prime_gem_destroy(gem, nvbo->bo.sg); diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 54dc6355b0c2..8b40a36c1b57 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -355,6 +355,7 @@ struct nv50_oimm { struct nv50_head { struct nouveau_crtc base; + struct nouveau_bo *image; struct nv50_curs curs; struct nv50_sync sync; struct nv50_ovly ovly; @@ -517,9 +518,10 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, { struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb); struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); + struct nv50_head *head = nv50_head(crtc); struct nv50_sync *sync = nv50_sync(crtc); - int head = nv_crtc->index, ret; u32 *push; + int ret; swap_interval <<= 4; if (swap_interval == 0) @@ -537,7 +539,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, return ret; BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); - OUT_RING (chan, NvEvoSema0 + head); + OUT_RING (chan, NvEvoSema0 + nv_crtc->index); OUT_RING (chan, sync->addr ^ 0x10); BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); OUT_RING (chan, sync->data + 1); @@ -546,7 +548,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (chan, sync->data); } else if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { - u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr; ret = RING_SPACE(chan, 12); if (ret) return ret; @@ -565,7 +567,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); } else if (chan) { - u64 addr = nv84_fence_crtc(chan, head) + sync->addr; + u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr; ret = RING_SPACE(chan, 10); if (ret) return ret; @@ -630,6 +632,8 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, evo_mthd(push, 0x0080, 1); evo_data(push, 0x00000000); evo_kick(push, sync); + + nouveau_bo_ref(nv_fb->nvbo, &head->image); return 0; } @@ -1038,18 +1042,17 @@ static int nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb) { struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb); + struct nv50_head *head = nv50_head(crtc); int ret; ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM); - if (ret) - return ret; - - if (old_fb) { - nvfb = nouveau_framebuffer(old_fb); - nouveau_bo_unpin(nvfb->nvbo); + if (ret == 0) { + if (head->image) + nouveau_bo_unpin(head->image); + nouveau_bo_ref(nvfb->nvbo, &head->image); } - return 0; + return ret; } static int @@ -1198,6 +1201,15 @@ nv50_crtc_lut_load(struct drm_crtc *crtc) } } +static void +nv50_crtc_disable(struct drm_crtc *crtc) +{ + struct nv50_head *head = nv50_head(crtc); + if (head->image) + nouveau_bo_unpin(head->image); + nouveau_bo_ref(NULL, &head->image); +} + static int nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, uint32_t width, uint32_t height) @@ -1271,18 +1283,29 @@ nv50_crtc_destroy(struct drm_crtc *crtc) struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nv50_disp *disp = nv50_disp(crtc->dev); struct nv50_head *head = nv50_head(crtc); + nv50_dmac_destroy(disp->core, &head->ovly.base); nv50_pioc_destroy(disp->core, &head->oimm.base); nv50_dmac_destroy(disp->core, &head->sync.base); nv50_pioc_destroy(disp->core, &head->curs.base); + + /*XXX: this shouldn't be necessary, but the core doesn't call + * disconnect() during the cleanup paths + */ + if (head->image) + nouveau_bo_unpin(head->image); + nouveau_bo_ref(NULL, &head->image); + nouveau_bo_unmap(nv_crtc->cursor.nvbo); if (nv_crtc->cursor.nvbo) nouveau_bo_unpin(nv_crtc->cursor.nvbo); nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo); + nouveau_bo_unmap(nv_crtc->lut.nvbo); if (nv_crtc->lut.nvbo) nouveau_bo_unpin(nv_crtc->lut.nvbo); nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo); + drm_crtc_cleanup(crtc); kfree(crtc); } @@ -1296,6 +1319,7 @@ static const struct drm_crtc_helper_funcs nv50_crtc_hfunc = { .mode_set_base = nv50_crtc_mode_set_base, .mode_set_base_atomic = nv50_crtc_mode_set_base_atomic, .load_lut = nv50_crtc_lut_load, + .disable = nv50_crtc_disable, }; static const struct drm_crtc_funcs nv50_crtc_func = { diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 93c2f2cceb51..eb89653a7a17 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -179,9 +179,10 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea uint32_t type, bool interruptible) { struct qxl_command cmd; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); return qxl_ring_push(qdev->command_ring, &cmd, interruptible); } @@ -191,9 +192,10 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas uint32_t type, bool interruptible) { struct qxl_command cmd; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible); } @@ -214,7 +216,6 @@ int qxl_garbage_collect(struct qxl_device *qdev) struct qxl_release *release; uint64_t id, next_id; int i = 0; - int ret; union qxl_release_info *info; while (qxl_ring_pop(qdev->release_ring, &id)) { @@ -224,17 +225,10 @@ int qxl_garbage_collect(struct qxl_device *qdev) if (release == NULL) break; - ret = qxl_release_reserve(qdev, release, false); - if (ret) { - qxl_io_log(qdev, "failed to reserve release on garbage collect %lld\n", id); - DRM_ERROR("failed to reserve release %lld\n", id); - } - info = qxl_release_map(qdev, release); next_id = info->next; qxl_release_unmap(qdev, release, info); - qxl_release_unreserve(qdev, release); QXL_INFO(qdev, "popped %lld, next %lld\n", id, next_id); @@ -259,27 +253,29 @@ int qxl_garbage_collect(struct qxl_device *qdev) return i; } -int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size, +int qxl_alloc_bo_reserved(struct qxl_device *qdev, + struct qxl_release *release, + unsigned long size, struct qxl_bo **_bo) { struct qxl_bo *bo; int ret; ret = qxl_bo_create(qdev, size, false /* not kernel - device */, - QXL_GEM_DOMAIN_VRAM, NULL, &bo); + false, QXL_GEM_DOMAIN_VRAM, NULL, &bo); if (ret) { DRM_ERROR("failed to allocate VRAM BO\n"); return ret; } - ret = qxl_bo_reserve(bo, false); - if (unlikely(ret != 0)) + ret = qxl_release_list_add(release, bo); + if (ret) goto out_unref; *_bo = bo; return 0; out_unref: qxl_bo_unref(&bo); - return 0; + return ret; } static int wait_for_io_cmd_user(struct qxl_device *qdev, uint8_t val, long port, bool intr) @@ -503,6 +499,10 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, if (ret) return ret; + ret = qxl_release_reserve_list(release, true); + if (ret) + return ret; + cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_SURFACE_CMD_CREATE; cmd->u.surface_create.format = surf->surf.format; @@ -524,14 +524,11 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev, surf->surf_create = release; - /* no need to add a release to the fence for this bo, + /* no need to add a release to the fence for this surface bo, since it is only released when we ask to destroy the surface and it would never signal otherwise */ - qxl_fence_releaseable(qdev, release); - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); - - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); surf->hw_surf_alloc = true; spin_lock(&qdev->surf_id_idr_lock); @@ -573,12 +570,9 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev, cmd->surface_id = id; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); - qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false); - qxl_release_unreserve(qdev, release); - + qxl_release_fence_buffer_objects(release); return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index f76f5dd7bfc4..835caba026d3 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -179,7 +179,7 @@ static void qxl_crtc_destroy(struct drm_crtc *crtc) kfree(qxl_crtc); } -static void +static int qxl_hide_cursor(struct qxl_device *qdev) { struct qxl_release *release; @@ -188,14 +188,22 @@ qxl_hide_cursor(struct qxl_device *qdev) ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD, &release, NULL); + if (ret) + return ret; + + ret = qxl_release_reserve_list(release, true); + if (ret) { + qxl_release_free(qdev, release); + return ret; + } cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_HIDE; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + return 0; } static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, @@ -216,10 +224,8 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, int size = 64*64*4; int ret = 0; - if (!handle) { - qxl_hide_cursor(qdev); - return 0; - } + if (!handle) + return qxl_hide_cursor(qdev); obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); if (!obj) { @@ -234,8 +240,9 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, goto out_unref; ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL); + qxl_bo_unreserve(user_bo); if (ret) - goto out_unreserve; + goto out_unref; ret = qxl_bo_kmap(user_bo, &user_ptr); if (ret) @@ -246,14 +253,20 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, &release, NULL); if (ret) goto out_kunmap; - ret = qxl_alloc_bo_reserved(qdev, sizeof(struct qxl_cursor) + size, - &cursor_bo); + + ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_cursor) + size, + &cursor_bo); if (ret) goto out_free_release; - ret = qxl_bo_kmap(cursor_bo, (void **)&cursor); + + ret = qxl_release_reserve_list(release, false); if (ret) goto out_free_bo; + ret = qxl_bo_kmap(cursor_bo, (void **)&cursor); + if (ret) + goto out_backoff; + cursor->header.unique = 0; cursor->header.type = SPICE_CURSOR_TYPE_ALPHA; cursor->header.width = 64; @@ -269,11 +282,7 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, qxl_bo_kunmap(cursor_bo); - /* finish with the userspace bo */ qxl_bo_kunmap(user_bo); - qxl_bo_unpin(user_bo); - qxl_bo_unreserve(user_bo); - drm_gem_object_unreference_unlocked(obj); cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_SET; @@ -281,30 +290,35 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, cmd->u.set.position.y = qcrtc->cur_y; cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); - qxl_release_add_res(qdev, release, cursor_bo); cmd->u.set.visible = 1; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + + /* finish with the userspace bo */ + ret = qxl_bo_reserve(user_bo, false); + if (!ret) { + qxl_bo_unpin(user_bo); + qxl_bo_unreserve(user_bo); + } + drm_gem_object_unreference_unlocked(obj); - qxl_bo_unreserve(cursor_bo); qxl_bo_unref(&cursor_bo); return ret; + +out_backoff: + qxl_release_backoff_reserve_list(release); out_free_bo: qxl_bo_unref(&cursor_bo); out_free_release: - qxl_release_unreserve(qdev, release); qxl_release_free(qdev, release); out_kunmap: qxl_bo_kunmap(user_bo); out_unpin: qxl_bo_unpin(user_bo); -out_unreserve: - qxl_bo_unreserve(user_bo); out_unref: drm_gem_object_unreference_unlocked(obj); return ret; @@ -322,6 +336,14 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD, &release, NULL); + if (ret) + return ret; + + ret = qxl_release_reserve_list(release, true); + if (ret) { + qxl_release_free(qdev, release); + return ret; + } qcrtc->cur_x = x; qcrtc->cur_y = y; @@ -332,9 +354,9 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, cmd->u.position.y = qcrtc->cur_y; qxl_release_unmap(qdev, release, &cmd->release_info); - qxl_fence_releaseable(qdev, release); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c index 3c8c3dbf9378..56e1d633875e 100644 --- a/drivers/gpu/drm/qxl/qxl_draw.c +++ b/drivers/gpu/drm/qxl/qxl_draw.c @@ -23,25 +23,29 @@ #include "qxl_drv.h" #include "qxl_object.h" +static int alloc_clips(struct qxl_device *qdev, + struct qxl_release *release, + unsigned num_clips, + struct qxl_bo **clips_bo) +{ + int size = sizeof(struct qxl_clip_rects) + sizeof(struct qxl_rect) * num_clips; + + return qxl_alloc_bo_reserved(qdev, release, size, clips_bo); +} + /* returns a pointer to the already allocated qxl_rect array inside * the qxl_clip_rects. This is *not* the same as the memory allocated * on the device, it is offset to qxl_clip_rects.chunk.data */ static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev, struct qxl_drawable *drawable, unsigned num_clips, - struct qxl_bo **clips_bo, - struct qxl_release *release) + struct qxl_bo *clips_bo) { struct qxl_clip_rects *dev_clips; int ret; - int size = sizeof(*dev_clips) + sizeof(struct qxl_rect) * num_clips; - ret = qxl_alloc_bo_reserved(qdev, size, clips_bo); - if (ret) - return NULL; - ret = qxl_bo_kmap(*clips_bo, (void **)&dev_clips); + ret = qxl_bo_kmap(clips_bo, (void **)&dev_clips); if (ret) { - qxl_bo_unref(clips_bo); return NULL; } dev_clips->num_rects = num_clips; @@ -52,20 +56,34 @@ static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev, } static int +alloc_drawable(struct qxl_device *qdev, struct qxl_release **release) +{ + int ret; + ret = qxl_alloc_release_reserved(qdev, sizeof(struct qxl_drawable), + QXL_RELEASE_DRAWABLE, release, + NULL); + return ret; +} + +static void +free_drawable(struct qxl_device *qdev, struct qxl_release *release) +{ + qxl_release_free(qdev, release); +} + +/* release needs to be reserved at this point */ +static int make_drawable(struct qxl_device *qdev, int surface, uint8_t type, const struct qxl_rect *rect, - struct qxl_release **release) + struct qxl_release *release) { struct qxl_drawable *drawable; - int i, ret; + int i; - ret = qxl_alloc_release_reserved(qdev, sizeof(*drawable), - QXL_RELEASE_DRAWABLE, release, - NULL); - if (ret) - return ret; + drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); + if (!drawable) + return -ENOMEM; - drawable = (struct qxl_drawable *)qxl_release_map(qdev, *release); drawable->type = type; drawable->surface_id = surface; /* Only primary for now */ @@ -91,14 +109,23 @@ make_drawable(struct qxl_device *qdev, int surface, uint8_t type, drawable->bbox = *rect; drawable->mm_time = qdev->rom->mm_clock; - qxl_release_unmap(qdev, *release, &drawable->release_info); + qxl_release_unmap(qdev, release, &drawable->release_info); return 0; } -static int qxl_palette_create_1bit(struct qxl_bo **palette_bo, +static int alloc_palette_object(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_bo **palette_bo) +{ + return qxl_alloc_bo_reserved(qdev, release, + sizeof(struct qxl_palette) + sizeof(uint32_t) * 2, + palette_bo); +} + +static int qxl_palette_create_1bit(struct qxl_bo *palette_bo, + struct qxl_release *release, const struct qxl_fb_image *qxl_fb_image) { - struct qxl_device *qdev = qxl_fb_image->qdev; const struct fb_image *fb_image = &qxl_fb_image->fb_image; uint32_t visual = qxl_fb_image->visual; const uint32_t *pseudo_palette = qxl_fb_image->pseudo_palette; @@ -108,12 +135,7 @@ static int qxl_palette_create_1bit(struct qxl_bo **palette_bo, static uint64_t unique; /* we make no attempt to actually set this * correctly globaly, since that would require * tracking all of our palettes. */ - - ret = qxl_alloc_bo_reserved(qdev, - sizeof(struct qxl_palette) + sizeof(uint32_t) * 2, - palette_bo); - - ret = qxl_bo_kmap(*palette_bo, (void **)&pal); + ret = qxl_bo_kmap(palette_bo, (void **)&pal); pal->num_ents = 2; pal->unique = unique++; if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) { @@ -126,7 +148,7 @@ static int qxl_palette_create_1bit(struct qxl_bo **palette_bo, } pal->ents[0] = bgcolor; pal->ents[1] = fgcolor; - qxl_bo_kunmap(*palette_bo); + qxl_bo_kunmap(palette_bo); return 0; } @@ -144,44 +166,63 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image, const char *src = fb_image->data; int depth = fb_image->depth; struct qxl_release *release; - struct qxl_bo *image_bo; struct qxl_image *image; int ret; - + struct qxl_drm_image *dimage; + struct qxl_bo *palette_bo = NULL; if (stride == 0) stride = depth * width / 8; + ret = alloc_drawable(qdev, &release); + if (ret) + return; + + ret = qxl_image_alloc_objects(qdev, release, + &dimage, + height, stride); + if (ret) + goto out_free_drawable; + + if (depth == 1) { + ret = alloc_palette_object(qdev, release, &palette_bo); + if (ret) + goto out_free_image; + } + + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_palette; + rect.left = x; rect.right = x + width; rect.top = y; rect.bottom = y + height; - ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, &release); - if (ret) - return; + ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, release); + if (ret) { + qxl_release_backoff_reserve_list(release); + goto out_free_palette; + } - ret = qxl_image_create(qdev, release, &image_bo, - (const uint8_t *)src, 0, 0, - width, height, depth, stride); + ret = qxl_image_init(qdev, release, dimage, + (const uint8_t *)src, 0, 0, + width, height, depth, stride); if (ret) { - qxl_release_unreserve(qdev, release); + qxl_release_backoff_reserve_list(release); qxl_release_free(qdev, release); return; } if (depth == 1) { - struct qxl_bo *palette_bo; void *ptr; - ret = qxl_palette_create_1bit(&palette_bo, qxl_fb_image); - qxl_release_add_res(qdev, release, palette_bo); + ret = qxl_palette_create_1bit(palette_bo, release, qxl_fb_image); - ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0); + ptr = qxl_bo_kmap_atomic_page(qdev, dimage->bo, 0); image = ptr; image->u.bitmap.palette = qxl_bo_physical_address(qdev, palette_bo, 0); - qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); - qxl_bo_unreserve(palette_bo); - qxl_bo_unref(&palette_bo); + qxl_bo_kunmap_atomic_page(qdev, dimage->bo, ptr); } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); @@ -199,16 +240,20 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image, drawable->u.copy.mask.bitmap = 0; drawable->u.copy.src_bitmap = - qxl_bo_physical_address(qdev, image_bo, 0); + qxl_bo_physical_address(qdev, dimage->bo, 0); qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_release_add_res(qdev, release, image_bo); - qxl_bo_unreserve(image_bo); - qxl_bo_unref(&image_bo); - - qxl_fence_releaseable(qdev, release); qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + +out_free_palette: + if (palette_bo) + qxl_bo_unref(&palette_bo); +out_free_image: + qxl_image_free_objects(qdev, dimage); +out_free_drawable: + if (ret) + free_drawable(qdev, release); } /* push a draw command using the given clipping rectangles as @@ -243,10 +288,14 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, int depth = qxl_fb->base.bits_per_pixel; uint8_t *surface_base; struct qxl_release *release; - struct qxl_bo *image_bo; struct qxl_bo *clips_bo; + struct qxl_drm_image *dimage; int ret; + ret = alloc_drawable(qdev, &release); + if (ret) + return; + left = clips->x1; right = clips->x2; top = clips->y1; @@ -263,36 +312,52 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, width = right - left; height = bottom - top; + + ret = alloc_clips(qdev, release, num_clips, &clips_bo); + if (ret) + goto out_free_drawable; + + ret = qxl_image_alloc_objects(qdev, release, + &dimage, + height, stride); + if (ret) + goto out_free_clips; + + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_image; + drawable_rect.left = left; drawable_rect.right = right; drawable_rect.top = top; drawable_rect.bottom = bottom; + ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &drawable_rect, - &release); + release); if (ret) - return; + goto out_release_backoff; ret = qxl_bo_kmap(bo, (void **)&surface_base); if (ret) - goto out_unref; + goto out_release_backoff; - ret = qxl_image_create(qdev, release, &image_bo, surface_base, - left, top, width, height, depth, stride); + + ret = qxl_image_init(qdev, release, dimage, surface_base, + left, top, width, height, depth, stride); qxl_bo_kunmap(bo); if (ret) - goto out_unref; + goto out_release_backoff; + + rects = drawable_set_clipping(qdev, drawable, num_clips, clips_bo); + if (!rects) + goto out_release_backoff; - rects = drawable_set_clipping(qdev, drawable, num_clips, &clips_bo, release); - if (!rects) { - qxl_bo_unref(&image_bo); - goto out_unref; - } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->clip.type = SPICE_CLIP_TYPE_RECTS; drawable->clip.data = qxl_bo_physical_address(qdev, clips_bo, 0); - qxl_release_add_res(qdev, release, clips_bo); drawable->u.copy.src_area.top = 0; drawable->u.copy.src_area.bottom = height; @@ -306,11 +371,9 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, drawable->u.copy.mask.pos.y = 0; drawable->u.copy.mask.bitmap = 0; - drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, image_bo, 0); + drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, dimage->bo, 0); qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_release_add_res(qdev, release, image_bo); - qxl_bo_unreserve(image_bo); - qxl_bo_unref(&image_bo); + clips_ptr = clips; for (i = 0; i < num_clips; i++, clips_ptr += inc) { rects[i].left = clips_ptr->x1; @@ -319,17 +382,22 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev, rects[i].bottom = clips_ptr->y2; } qxl_bo_kunmap(clips_bo); - qxl_bo_unreserve(clips_bo); - qxl_bo_unref(&clips_bo); - qxl_fence_releaseable(qdev, release); qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); - return; + qxl_release_fence_buffer_objects(release); + +out_release_backoff: + if (ret) + qxl_release_backoff_reserve_list(release); +out_free_image: + qxl_image_free_objects(qdev, dimage); +out_free_clips: + qxl_bo_unref(&clips_bo); +out_free_drawable: + /* only free drawable on error */ + if (ret) + free_drawable(qdev, release); -out_unref: - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); } void qxl_draw_copyarea(struct qxl_device *qdev, @@ -342,22 +410,36 @@ void qxl_draw_copyarea(struct qxl_device *qdev, struct qxl_release *release; int ret; + ret = alloc_drawable(qdev, &release); + if (ret) + return; + + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_release; + rect.left = dx; rect.top = dy; rect.right = dx + width; rect.bottom = dy + height; - ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, &release); - if (ret) - return; + ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, release); + if (ret) { + qxl_release_backoff_reserve_list(release); + goto out_free_release; + } drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->u.copy_bits.src_pos.x = sx; drawable->u.copy_bits.src_pos.y = sy; - qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_fence_releaseable(qdev, release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + +out_free_release: + if (ret) + free_drawable(qdev, release); } void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) @@ -370,10 +452,21 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) struct qxl_release *release; int ret; - ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, &release); + ret = alloc_drawable(qdev, &release); if (ret) return; + /* do a reservation run over all the objects we just allocated */ + ret = qxl_release_reserve_list(release, true); + if (ret) + goto out_free_release; + + ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, release); + if (ret) { + qxl_release_backoff_reserve_list(release); + goto out_free_release; + } + drawable = (struct qxl_drawable *)qxl_release_map(qdev, release); drawable->u.fill.brush.type = SPICE_BRUSH_TYPE_SOLID; drawable->u.fill.brush.u.color = color; @@ -384,7 +477,11 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec) drawable->u.fill.mask.bitmap = 0; qxl_release_unmap(qdev, release, &drawable->release_info); - qxl_fence_releaseable(qdev, release); + qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false); - qxl_release_unreserve(qdev, release); + qxl_release_fence_buffer_objects(release); + +out_free_release: + if (ret) + free_drawable(qdev, release); } diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index aacb791464a3..7e96f4f11738 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -42,6 +42,9 @@ #include <ttm/ttm_placement.h> #include <ttm/ttm_module.h> +/* just for ttm_validate_buffer */ +#include <ttm/ttm_execbuf_util.h> + #include <drm/qxl_drm.h> #include "qxl_dev.h" @@ -118,9 +121,9 @@ struct qxl_bo { uint32_t surface_id; struct qxl_fence fence; /* per bo fence - list of releases */ struct qxl_release *surf_create; - atomic_t reserve_count; }; #define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base) +#define to_qxl_bo(tobj) container_of((tobj), struct qxl_bo, tbo) struct qxl_gem { struct mutex mutex; @@ -128,12 +131,7 @@ struct qxl_gem { }; struct qxl_bo_list { - struct list_head lhead; - struct qxl_bo *bo; -}; - -struct qxl_reloc_list { - struct list_head bos; + struct ttm_validate_buffer tv; }; struct qxl_crtc { @@ -195,10 +193,20 @@ enum { struct qxl_release { int id; int type; - int bo_count; uint32_t release_offset; uint32_t surface_release_id; - struct qxl_bo *bos[QXL_MAX_RES]; + struct ww_acquire_ctx ticket; + struct list_head bos; +}; + +struct qxl_drm_chunk { + struct list_head head; + struct qxl_bo *bo; +}; + +struct qxl_drm_image { + struct qxl_bo *bo; + struct list_head chunk_list; }; struct qxl_fb_image { @@ -314,6 +322,7 @@ struct qxl_device { struct workqueue_struct *gc_queue; struct work_struct gc_work; + struct work_struct fb_work; }; /* forward declaration for QXL_INFO_IO */ @@ -433,12 +442,19 @@ int qxl_mmap(struct file *filp, struct vm_area_struct *vma); /* qxl image */ -int qxl_image_create(struct qxl_device *qdev, - struct qxl_release *release, - struct qxl_bo **image_bo, - const uint8_t *data, - int x, int y, int width, int height, - int depth, int stride); +int qxl_image_init(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image *dimage, + const uint8_t *data, + int x, int y, int width, int height, + int depth, int stride); +int +qxl_image_alloc_objects(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image **image_ptr, + int height, int stride); +void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage); + void qxl_update_screen(struct qxl_device *qxl); /* qxl io operations (qxl_cmd.c) */ @@ -459,20 +475,15 @@ int qxl_ring_push(struct qxl_ring *ring, const void *new_elt, bool interruptible void qxl_io_flush_release(struct qxl_device *qdev); void qxl_io_flush_surfaces(struct qxl_device *qdev); -int qxl_release_reserve(struct qxl_device *qdev, - struct qxl_release *release, bool no_wait); -void qxl_release_unreserve(struct qxl_device *qdev, - struct qxl_release *release); union qxl_release_info *qxl_release_map(struct qxl_device *qdev, struct qxl_release *release); void qxl_release_unmap(struct qxl_device *qdev, struct qxl_release *release, union qxl_release_info *info); -/* - * qxl_bo_add_resource. - * - */ -void qxl_bo_add_resource(struct qxl_bo *main_bo, struct qxl_bo *resource); +int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo); +int qxl_release_reserve_list(struct qxl_release *release, bool no_intr); +void qxl_release_backoff_reserve_list(struct qxl_release *release); +void qxl_release_fence_buffer_objects(struct qxl_release *release); int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, enum qxl_surface_cmd_type surface_cmd_type, @@ -481,15 +492,16 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, int type, struct qxl_release **release, struct qxl_bo **rbo); -int qxl_fence_releaseable(struct qxl_device *qdev, - struct qxl_release *release); + int qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *release, uint32_t type, bool interruptible); int qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *release, uint32_t type, bool interruptible); -int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size, +int qxl_alloc_bo_reserved(struct qxl_device *qdev, + struct qxl_release *release, + unsigned long size, struct qxl_bo **_bo); /* qxl drawing commands */ @@ -510,15 +522,9 @@ void qxl_draw_copyarea(struct qxl_device *qdev, u32 sx, u32 sy, u32 dx, u32 dy); -uint64_t -qxl_release_alloc(struct qxl_device *qdev, int type, - struct qxl_release **ret); - void qxl_release_free(struct qxl_device *qdev, struct qxl_release *release); -void qxl_release_add_res(struct qxl_device *qdev, - struct qxl_release *release, - struct qxl_bo *bo); + /* used by qxl_debugfs_release */ struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev, uint64_t id); @@ -561,7 +567,7 @@ void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freein int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf); /* qxl_fence.c */ -int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id); +void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id); int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id); int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence); void qxl_fence_fini(struct qxl_fence *qfence); diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 76f39d88d684..88722f233430 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -37,12 +37,29 @@ #define QXL_DIRTY_DELAY (HZ / 30) +#define QXL_FB_OP_FILLRECT 1 +#define QXL_FB_OP_COPYAREA 2 +#define QXL_FB_OP_IMAGEBLIT 3 + +struct qxl_fb_op { + struct list_head head; + int op_type; + union { + struct fb_fillrect fr; + struct fb_copyarea ca; + struct fb_image ib; + } op; + void *img_data; +}; + struct qxl_fbdev { struct drm_fb_helper helper; struct qxl_framebuffer qfb; struct list_head fbdev_list; struct qxl_device *qdev; + spinlock_t delayed_ops_lock; + struct list_head delayed_ops; void *shadow; int size; @@ -164,8 +181,69 @@ static struct fb_deferred_io qxl_defio = { .deferred_io = qxl_deferred_io, }; -static void qxl_fb_fillrect(struct fb_info *info, - const struct fb_fillrect *fb_rect) +static void qxl_fb_delayed_fillrect(struct qxl_fbdev *qfbdev, + const struct fb_fillrect *fb_rect) +{ + struct qxl_fb_op *op; + unsigned long flags; + + op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN); + if (!op) + return; + + op->op.fr = *fb_rect; + op->img_data = NULL; + op->op_type = QXL_FB_OP_FILLRECT; + + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_add_tail(&op->head, &qfbdev->delayed_ops); + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); +} + +static void qxl_fb_delayed_copyarea(struct qxl_fbdev *qfbdev, + const struct fb_copyarea *fb_copy) +{ + struct qxl_fb_op *op; + unsigned long flags; + + op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN); + if (!op) + return; + + op->op.ca = *fb_copy; + op->img_data = NULL; + op->op_type = QXL_FB_OP_COPYAREA; + + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_add_tail(&op->head, &qfbdev->delayed_ops); + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); +} + +static void qxl_fb_delayed_imageblit(struct qxl_fbdev *qfbdev, + const struct fb_image *fb_image) +{ + struct qxl_fb_op *op; + unsigned long flags; + uint32_t size = fb_image->width * fb_image->height * (fb_image->depth >= 8 ? fb_image->depth / 8 : 1); + + op = kmalloc(sizeof(struct qxl_fb_op) + size, GFP_ATOMIC | __GFP_NOWARN); + if (!op) + return; + + op->op.ib = *fb_image; + op->img_data = (void *)(op + 1); + op->op_type = QXL_FB_OP_IMAGEBLIT; + + memcpy(op->img_data, fb_image->data, size); + + op->op.ib.data = op->img_data; + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_add_tail(&op->head, &qfbdev->delayed_ops); + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); +} + +static void qxl_fb_fillrect_internal(struct fb_info *info, + const struct fb_fillrect *fb_rect) { struct qxl_fbdev *qfbdev = info->par; struct qxl_device *qdev = qfbdev->qdev; @@ -203,17 +281,28 @@ static void qxl_fb_fillrect(struct fb_info *info, qxl_draw_fill_rec.rect = rect; qxl_draw_fill_rec.color = color; qxl_draw_fill_rec.rop = rop; + + qxl_draw_fill(&qxl_draw_fill_rec); +} + +static void qxl_fb_fillrect(struct fb_info *info, + const struct fb_fillrect *fb_rect) +{ + struct qxl_fbdev *qfbdev = info->par; + struct qxl_device *qdev = qfbdev->qdev; + if (!drm_can_sleep()) { - qxl_io_log(qdev, - "%s: TODO use RCU, mysterious locks with spin_lock\n", - __func__); + qxl_fb_delayed_fillrect(qfbdev, fb_rect); + schedule_work(&qdev->fb_work); return; } - qxl_draw_fill(&qxl_draw_fill_rec); + /* make sure any previous work is done */ + flush_work(&qdev->fb_work); + qxl_fb_fillrect_internal(info, fb_rect); } -static void qxl_fb_copyarea(struct fb_info *info, - const struct fb_copyarea *region) +static void qxl_fb_copyarea_internal(struct fb_info *info, + const struct fb_copyarea *region) { struct qxl_fbdev *qfbdev = info->par; @@ -223,37 +312,89 @@ static void qxl_fb_copyarea(struct fb_info *info, region->dx, region->dy); } +static void qxl_fb_copyarea(struct fb_info *info, + const struct fb_copyarea *region) +{ + struct qxl_fbdev *qfbdev = info->par; + struct qxl_device *qdev = qfbdev->qdev; + + if (!drm_can_sleep()) { + qxl_fb_delayed_copyarea(qfbdev, region); + schedule_work(&qdev->fb_work); + return; + } + /* make sure any previous work is done */ + flush_work(&qdev->fb_work); + qxl_fb_copyarea_internal(info, region); +} + static void qxl_fb_imageblit_safe(struct qxl_fb_image *qxl_fb_image) { qxl_draw_opaque_fb(qxl_fb_image, 0); } +static void qxl_fb_imageblit_internal(struct fb_info *info, + const struct fb_image *image) +{ + struct qxl_fbdev *qfbdev = info->par; + struct qxl_fb_image qxl_fb_image; + + /* ensure proper order rendering operations - TODO: must do this + * for everything. */ + qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image); + qxl_fb_imageblit_safe(&qxl_fb_image); +} + static void qxl_fb_imageblit(struct fb_info *info, const struct fb_image *image) { struct qxl_fbdev *qfbdev = info->par; struct qxl_device *qdev = qfbdev->qdev; - struct qxl_fb_image qxl_fb_image; if (!drm_can_sleep()) { - /* we cannot do any ttm_bo allocation since that will fail on - * ioremap_wc..__get_vm_area_node, so queue the work item - * instead This can happen from printk inside an interrupt - * context, i.e.: smp_apic_timer_interrupt..check_cpu_stall */ - qxl_io_log(qdev, - "%s: TODO use RCU, mysterious locks with spin_lock\n", - __func__); + qxl_fb_delayed_imageblit(qfbdev, image); + schedule_work(&qdev->fb_work); return; } + /* make sure any previous work is done */ + flush_work(&qdev->fb_work); + qxl_fb_imageblit_internal(info, image); +} - /* ensure proper order of rendering operations - TODO: must do this - * for everything. */ - qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image); - qxl_fb_imageblit_safe(&qxl_fb_image); +static void qxl_fb_work(struct work_struct *work) +{ + struct qxl_device *qdev = container_of(work, struct qxl_device, fb_work); + unsigned long flags; + struct qxl_fb_op *entry, *tmp; + struct qxl_fbdev *qfbdev = qdev->mode_info.qfbdev; + + /* since the irq context just adds entries to the end of the + list dropping the lock should be fine, as entry isn't modified + in the operation code */ + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_for_each_entry_safe(entry, tmp, &qfbdev->delayed_ops, head) { + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); + switch (entry->op_type) { + case QXL_FB_OP_FILLRECT: + qxl_fb_fillrect_internal(qfbdev->helper.fbdev, &entry->op.fr); + break; + case QXL_FB_OP_COPYAREA: + qxl_fb_copyarea_internal(qfbdev->helper.fbdev, &entry->op.ca); + break; + case QXL_FB_OP_IMAGEBLIT: + qxl_fb_imageblit_internal(qfbdev->helper.fbdev, &entry->op.ib); + break; + } + spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags); + list_del(&entry->head); + kfree(entry); + } + spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags); } int qxl_fb_init(struct qxl_device *qdev) { + INIT_WORK(&qdev->fb_work, qxl_fb_work); return 0; } @@ -536,7 +677,8 @@ int qxl_fbdev_init(struct qxl_device *qdev) qfbdev->qdev = qdev; qdev->mode_info.qfbdev = qfbdev; qfbdev->helper.funcs = &qxl_fb_helper_funcs; - + spin_lock_init(&qfbdev->delayed_ops_lock); + INIT_LIST_HEAD(&qfbdev->delayed_ops); ret = drm_fb_helper_init(qdev->ddev, &qfbdev->helper, qxl_num_crtc /* num_crtc - QXL supports just 1 */, QXLFB_CONN_LIMIT); diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c index 63c6715ad385..ae59e91cfb9a 100644 --- a/drivers/gpu/drm/qxl/qxl_fence.c +++ b/drivers/gpu/drm/qxl/qxl_fence.c @@ -49,17 +49,11 @@ For some reason every so often qxl hw fails to release, things go wrong. */ - - -int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id) +/* must be called with the fence lock held */ +void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id) { - struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence); - - spin_lock(&bo->tbo.bdev->fence_lock); radix_tree_insert(&qfence->tree, rel_id, qfence); qfence->num_active_releases++; - spin_unlock(&bo->tbo.bdev->fence_lock); - return 0; } int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id) diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c index a235693aabba..25e1777fb0a2 100644 --- a/drivers/gpu/drm/qxl/qxl_gem.c +++ b/drivers/gpu/drm/qxl/qxl_gem.c @@ -55,7 +55,7 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size, /* At least align on page size */ if (alignment < PAGE_SIZE) alignment = PAGE_SIZE; - r = qxl_bo_create(qdev, size, kernel, initial_domain, surf, &qbo); + r = qxl_bo_create(qdev, size, kernel, false, initial_domain, surf, &qbo); if (r) { if (r != -ERESTARTSYS) DRM_ERROR( diff --git a/drivers/gpu/drm/qxl/qxl_image.c b/drivers/gpu/drm/qxl/qxl_image.c index cf856206996b..7fbcc35e8ad3 100644 --- a/drivers/gpu/drm/qxl/qxl_image.c +++ b/drivers/gpu/drm/qxl/qxl_image.c @@ -30,31 +30,100 @@ #include "qxl_object.h" static int -qxl_image_create_helper(struct qxl_device *qdev, +qxl_allocate_chunk(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image *image, + unsigned int chunk_size) +{ + struct qxl_drm_chunk *chunk; + int ret; + + chunk = kmalloc(sizeof(struct qxl_drm_chunk), GFP_KERNEL); + if (!chunk) + return -ENOMEM; + + ret = qxl_alloc_bo_reserved(qdev, release, chunk_size, &chunk->bo); + if (ret) { + kfree(chunk); + return ret; + } + + list_add_tail(&chunk->head, &image->chunk_list); + return 0; +} + +int +qxl_image_alloc_objects(struct qxl_device *qdev, struct qxl_release *release, - struct qxl_bo **image_bo, - const uint8_t *data, - int width, int height, - int depth, unsigned int hash, - int stride) + struct qxl_drm_image **image_ptr, + int height, int stride) +{ + struct qxl_drm_image *image; + int ret; + + image = kmalloc(sizeof(struct qxl_drm_image), GFP_KERNEL); + if (!image) + return -ENOMEM; + + INIT_LIST_HEAD(&image->chunk_list); + + ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_image), &image->bo); + if (ret) { + kfree(image); + return ret; + } + + ret = qxl_allocate_chunk(qdev, release, image, sizeof(struct qxl_data_chunk) + stride * height); + if (ret) { + qxl_bo_unref(&image->bo); + kfree(image); + return ret; + } + *image_ptr = image; + return 0; +} + +void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage) { + struct qxl_drm_chunk *chunk, *tmp; + + list_for_each_entry_safe(chunk, tmp, &dimage->chunk_list, head) { + qxl_bo_unref(&chunk->bo); + kfree(chunk); + } + + qxl_bo_unref(&dimage->bo); + kfree(dimage); +} + +static int +qxl_image_init_helper(struct qxl_device *qdev, + struct qxl_release *release, + struct qxl_drm_image *dimage, + const uint8_t *data, + int width, int height, + int depth, unsigned int hash, + int stride) +{ + struct qxl_drm_chunk *drv_chunk; struct qxl_image *image; struct qxl_data_chunk *chunk; int i; int chunk_stride; int linesize = width * depth / 8; - struct qxl_bo *chunk_bo; - int ret; + struct qxl_bo *chunk_bo, *image_bo; void *ptr; /* Chunk */ /* FIXME: Check integer overflow */ /* TODO: variable number of chunks */ + + drv_chunk = list_first_entry(&dimage->chunk_list, struct qxl_drm_chunk, head); + + chunk_bo = drv_chunk->bo; chunk_stride = stride; /* TODO: should use linesize, but it renders wrong (check the bitmaps are sent correctly first) */ - ret = qxl_alloc_bo_reserved(qdev, sizeof(*chunk) + height * chunk_stride, - &chunk_bo); - + ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 0); chunk = ptr; chunk->data_size = height * chunk_stride; @@ -102,7 +171,6 @@ qxl_image_create_helper(struct qxl_device *qdev, while (remain > 0) { page_base = out_offset & PAGE_MASK; page_offset = offset_in_page(out_offset); - size = min((int)(PAGE_SIZE - page_offset), remain); ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, page_base); @@ -116,14 +184,10 @@ qxl_image_create_helper(struct qxl_device *qdev, } } } - - qxl_bo_kunmap(chunk_bo); - /* Image */ - ret = qxl_alloc_bo_reserved(qdev, sizeof(*image), image_bo); - - ptr = qxl_bo_kmap_atomic_page(qdev, *image_bo, 0); + image_bo = dimage->bo; + ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0); image = ptr; image->descriptor.id = 0; @@ -154,23 +218,20 @@ qxl_image_create_helper(struct qxl_device *qdev, image->u.bitmap.stride = chunk_stride; image->u.bitmap.palette = 0; image->u.bitmap.data = qxl_bo_physical_address(qdev, chunk_bo, 0); - qxl_release_add_res(qdev, release, chunk_bo); - qxl_bo_unreserve(chunk_bo); - qxl_bo_unref(&chunk_bo); - qxl_bo_kunmap_atomic_page(qdev, *image_bo, ptr); + qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); return 0; } -int qxl_image_create(struct qxl_device *qdev, +int qxl_image_init(struct qxl_device *qdev, struct qxl_release *release, - struct qxl_bo **image_bo, + struct qxl_drm_image *dimage, const uint8_t *data, int x, int y, int width, int height, int depth, int stride) { data += y * stride + x * (depth / 8); - return qxl_image_create_helper(qdev, release, image_bo, data, + return qxl_image_init_helper(qdev, release, dimage, data, width, height, depth, 0, stride); } diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 27f45e49250d..6de33563d6f1 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -68,55 +68,60 @@ static int qxl_map_ioctl(struct drm_device *dev, void *data, &qxl_map->offset); } +struct qxl_reloc_info { + int type; + struct qxl_bo *dst_bo; + uint32_t dst_offset; + struct qxl_bo *src_bo; + int src_offset; +}; + /* * dst must be validated, i.e. whole bo on vram/surfacesram (right now all bo's * are on vram). * *(dst + dst_off) = qxl_bo_physical_address(src, src_off) */ static void -apply_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off, - struct qxl_bo *src, uint64_t src_off) +apply_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info) { void *reloc_page; - - reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK); - *(uint64_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = qxl_bo_physical_address(qdev, - src, src_off); - qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page); + reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK); + *(uint64_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = qxl_bo_physical_address(qdev, + info->src_bo, + info->src_offset); + qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page); } static void -apply_surf_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off, - struct qxl_bo *src) +apply_surf_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info) { uint32_t id = 0; void *reloc_page; - if (src && !src->is_primary) - id = src->surface_id; + if (info->src_bo && !info->src_bo->is_primary) + id = info->src_bo->surface_id; - reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK); - *(uint32_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = id; - qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page); + reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK); + *(uint32_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = id; + qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page); } /* return holding the reference to this object */ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev, struct drm_file *file_priv, uint64_t handle, - struct qxl_reloc_list *reloc_list) + struct qxl_release *release) { struct drm_gem_object *gobj; struct qxl_bo *qobj; int ret; gobj = drm_gem_object_lookup(qdev->ddev, file_priv, handle); - if (!gobj) { - DRM_ERROR("bad bo handle %lld\n", handle); + if (!gobj) return NULL; - } + qobj = gem_to_qxl_bo(gobj); - ret = qxl_bo_list_add(reloc_list, qobj); + ret = qxl_release_list_add(release, qobj); if (ret) return NULL; @@ -129,151 +134,177 @@ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev, * However, the command as passed from user space must *not* contain the initial * QXLReleaseInfo struct (first XXX bytes) */ -static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv) +static int qxl_process_single_command(struct qxl_device *qdev, + struct drm_qxl_command *cmd, + struct drm_file *file_priv) { - struct qxl_device *qdev = dev->dev_private; - struct drm_qxl_execbuffer *execbuffer = data; - struct drm_qxl_command user_cmd; - int cmd_num; - struct qxl_bo *reloc_src_bo; - struct qxl_bo *reloc_dst_bo; - struct drm_qxl_reloc reloc; + struct qxl_reloc_info *reloc_info; + int release_type; + struct qxl_release *release; + struct qxl_bo *cmd_bo; void *fb_cmd; - int i, ret; - struct qxl_reloc_list reloc_list; + int i, j, ret, num_relocs; int unwritten; - uint32_t reloc_dst_offset; - INIT_LIST_HEAD(&reloc_list.bos); - for (cmd_num = 0; cmd_num < execbuffer->commands_num; ++cmd_num) { - struct qxl_release *release; - struct qxl_bo *cmd_bo; - int release_type; - struct drm_qxl_command *commands = - (struct drm_qxl_command *)(uintptr_t)execbuffer->commands; + switch (cmd->type) { + case QXL_CMD_DRAW: + release_type = QXL_RELEASE_DRAWABLE; + break; + case QXL_CMD_SURFACE: + case QXL_CMD_CURSOR: + default: + DRM_DEBUG("Only draw commands in execbuffers\n"); + return -EINVAL; + break; + } - if (DRM_COPY_FROM_USER(&user_cmd, &commands[cmd_num], - sizeof(user_cmd))) - return -EFAULT; - switch (user_cmd.type) { - case QXL_CMD_DRAW: - release_type = QXL_RELEASE_DRAWABLE; - break; - case QXL_CMD_SURFACE: - case QXL_CMD_CURSOR: - default: - DRM_DEBUG("Only draw commands in execbuffers\n"); - return -EINVAL; - break; - } + if (cmd->command_size > PAGE_SIZE - sizeof(union qxl_release_info)) + return -EINVAL; - if (user_cmd.command_size > PAGE_SIZE - sizeof(union qxl_release_info)) - return -EINVAL; + if (!access_ok(VERIFY_READ, + (void *)(unsigned long)cmd->command, + cmd->command_size)) + return -EFAULT; - if (!access_ok(VERIFY_READ, - (void *)(unsigned long)user_cmd.command, - user_cmd.command_size)) - return -EFAULT; + reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL); + if (!reloc_info) + return -ENOMEM; - ret = qxl_alloc_release_reserved(qdev, - sizeof(union qxl_release_info) + - user_cmd.command_size, - release_type, - &release, - &cmd_bo); - if (ret) - return ret; + ret = qxl_alloc_release_reserved(qdev, + sizeof(union qxl_release_info) + + cmd->command_size, + release_type, + &release, + &cmd_bo); + if (ret) + goto out_free_reloc; - /* TODO copy slow path code from i915 */ - fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE)); - unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)user_cmd.command, user_cmd.command_size); + /* TODO copy slow path code from i915 */ + fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE)); + unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)cmd->command, cmd->command_size); - { - struct qxl_drawable *draw = fb_cmd; + { + struct qxl_drawable *draw = fb_cmd; + draw->mm_time = qdev->rom->mm_clock; + } - draw->mm_time = qdev->rom->mm_clock; - } - qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd); - if (unwritten) { - DRM_ERROR("got unwritten %d\n", unwritten); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EFAULT; + qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd); + if (unwritten) { + DRM_ERROR("got unwritten %d\n", unwritten); + ret = -EFAULT; + goto out_free_release; + } + + /* fill out reloc info structs */ + num_relocs = 0; + for (i = 0; i < cmd->relocs_num; ++i) { + struct drm_qxl_reloc reloc; + + if (DRM_COPY_FROM_USER(&reloc, + &((struct drm_qxl_reloc *)(uintptr_t)cmd->relocs)[i], + sizeof(reloc))) { + ret = -EFAULT; + goto out_free_bos; } - for (i = 0 ; i < user_cmd.relocs_num; ++i) { - if (DRM_COPY_FROM_USER(&reloc, - &((struct drm_qxl_reloc *)(uintptr_t)user_cmd.relocs)[i], - sizeof(reloc))) { - qxl_bo_list_unreserve(&reloc_list, true); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EFAULT; - } + /* add the bos to the list of bos to validate - + need to validate first then process relocs? */ + if (reloc.reloc_type != QXL_RELOC_TYPE_BO && reloc.reloc_type != QXL_RELOC_TYPE_SURF) { + DRM_DEBUG("unknown reloc type %d\n", reloc_info[i].type); - /* add the bos to the list of bos to validate - - need to validate first then process relocs? */ - if (reloc.dst_handle) { - reloc_dst_bo = qxlhw_handle_to_bo(qdev, file_priv, - reloc.dst_handle, &reloc_list); - if (!reloc_dst_bo) { - qxl_bo_list_unreserve(&reloc_list, true); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EINVAL; - } - reloc_dst_offset = 0; - } else { - reloc_dst_bo = cmd_bo; - reloc_dst_offset = release->release_offset; + ret = -EINVAL; + goto out_free_bos; + } + reloc_info[i].type = reloc.reloc_type; + + if (reloc.dst_handle) { + reloc_info[i].dst_bo = qxlhw_handle_to_bo(qdev, file_priv, + reloc.dst_handle, release); + if (!reloc_info[i].dst_bo) { + ret = -EINVAL; + reloc_info[i].src_bo = NULL; + goto out_free_bos; } - - /* reserve and validate the reloc dst bo */ - if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) { - reloc_src_bo = - qxlhw_handle_to_bo(qdev, file_priv, - reloc.src_handle, &reloc_list); - if (!reloc_src_bo) { - if (reloc_dst_bo != cmd_bo) - drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base); - qxl_bo_list_unreserve(&reloc_list, true); - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - return -EINVAL; - } - } else - reloc_src_bo = NULL; - if (reloc.reloc_type == QXL_RELOC_TYPE_BO) { - apply_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset, - reloc_src_bo, reloc.src_offset); - } else if (reloc.reloc_type == QXL_RELOC_TYPE_SURF) { - apply_surf_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset, reloc_src_bo); - } else { - DRM_ERROR("unknown reloc type %d\n", reloc.reloc_type); - return -EINVAL; + reloc_info[i].dst_offset = reloc.dst_offset; + } else { + reloc_info[i].dst_bo = cmd_bo; + reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset; + } + num_relocs++; + + /* reserve and validate the reloc dst bo */ + if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) { + reloc_info[i].src_bo = + qxlhw_handle_to_bo(qdev, file_priv, + reloc.src_handle, release); + if (!reloc_info[i].src_bo) { + if (reloc_info[i].dst_bo != cmd_bo) + drm_gem_object_unreference_unlocked(&reloc_info[i].dst_bo->gem_base); + ret = -EINVAL; + goto out_free_bos; } + reloc_info[i].src_offset = reloc.src_offset; + } else { + reloc_info[i].src_bo = NULL; + reloc_info[i].src_offset = 0; + } + } - if (reloc_src_bo && reloc_src_bo != cmd_bo) { - qxl_release_add_res(qdev, release, reloc_src_bo); - drm_gem_object_unreference_unlocked(&reloc_src_bo->gem_base); - } + /* validate all buffers */ + ret = qxl_release_reserve_list(release, false); + if (ret) + goto out_free_bos; - if (reloc_dst_bo != cmd_bo) - drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base); - } - qxl_fence_releaseable(qdev, release); + for (i = 0; i < cmd->relocs_num; ++i) { + if (reloc_info[i].type == QXL_RELOC_TYPE_BO) + apply_reloc(qdev, &reloc_info[i]); + else if (reloc_info[i].type == QXL_RELOC_TYPE_SURF) + apply_surf_reloc(qdev, &reloc_info[i]); + } - ret = qxl_push_command_ring_release(qdev, release, user_cmd.type, true); - if (ret == -ERESTARTSYS) { - qxl_release_unreserve(qdev, release); - qxl_release_free(qdev, release); - qxl_bo_list_unreserve(&reloc_list, true); + ret = qxl_push_command_ring_release(qdev, release, cmd->type, true); + if (ret) + qxl_release_backoff_reserve_list(release); + else + qxl_release_fence_buffer_objects(release); + +out_free_bos: + for (j = 0; j < num_relocs; j++) { + if (reloc_info[j].dst_bo != cmd_bo) + drm_gem_object_unreference_unlocked(&reloc_info[j].dst_bo->gem_base); + if (reloc_info[j].src_bo && reloc_info[j].src_bo != cmd_bo) + drm_gem_object_unreference_unlocked(&reloc_info[j].src_bo->gem_base); + } +out_free_release: + if (ret) + qxl_release_free(qdev, release); +out_free_reloc: + kfree(reloc_info); + return ret; +} + +static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct qxl_device *qdev = dev->dev_private; + struct drm_qxl_execbuffer *execbuffer = data; + struct drm_qxl_command user_cmd; + int cmd_num; + int ret; + + for (cmd_num = 0; cmd_num < execbuffer->commands_num; ++cmd_num) { + + struct drm_qxl_command *commands = + (struct drm_qxl_command *)(uintptr_t)execbuffer->commands; + + if (DRM_COPY_FROM_USER(&user_cmd, &commands[cmd_num], + sizeof(user_cmd))) + return -EFAULT; + + ret = qxl_process_single_command(qdev, &user_cmd, file_priv); + if (ret) return ret; - } - qxl_release_unreserve(qdev, release); } - qxl_bo_list_unreserve(&reloc_list, 0); return 0; } @@ -305,7 +336,7 @@ static int qxl_update_area_ioctl(struct drm_device *dev, void *data, goto out; if (!qobj->pin_count) { - qxl_ttm_placement_from_domain(qobj, qobj->type); + qxl_ttm_placement_from_domain(qobj, qobj->type, false); ret = ttm_bo_validate(&qobj->tbo, &qobj->placement, true, false); if (unlikely(ret)) diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index 1191fe7788c9..aa161cddd87e 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -51,20 +51,21 @@ bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo) return false; } -void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain) +void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned) { u32 c = 0; + u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0; qbo->placement.fpfn = 0; qbo->placement.lpfn = 0; qbo->placement.placement = qbo->placements; qbo->placement.busy_placement = qbo->placements; if (domain == QXL_GEM_DOMAIN_VRAM) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM; + qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag; if (domain == QXL_GEM_DOMAIN_SURFACE) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0; + qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag; if (domain == QXL_GEM_DOMAIN_CPU) - qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag; if (!c) qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; qbo->placement.num_placement = c; @@ -73,7 +74,7 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain) int qxl_bo_create(struct qxl_device *qdev, - unsigned long size, bool kernel, u32 domain, + unsigned long size, bool kernel, bool pinned, u32 domain, struct qxl_surface *surf, struct qxl_bo **bo_ptr) { @@ -99,15 +100,15 @@ int qxl_bo_create(struct qxl_device *qdev, } bo->gem_base.driver_private = NULL; bo->type = domain; - bo->pin_count = 0; + bo->pin_count = pinned ? 1 : 0; bo->surface_id = 0; qxl_fence_init(qdev, &bo->fence); INIT_LIST_HEAD(&bo->list); - atomic_set(&bo->reserve_count, 0); + if (surf) bo->surf = *surf; - qxl_ttm_placement_from_domain(bo, domain); + qxl_ttm_placement_from_domain(bo, domain, pinned); r = ttm_bo_init(&qdev->mman.bdev, &bo->tbo, size, type, &bo->placement, 0, !kernel, NULL, size, @@ -228,7 +229,7 @@ struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo) int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr) { struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private; - int r, i; + int r; if (bo->pin_count) { bo->pin_count++; @@ -236,9 +237,7 @@ int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr) *gpu_addr = qxl_bo_gpu_offset(bo); return 0; } - qxl_ttm_placement_from_domain(bo, domain); - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + qxl_ttm_placement_from_domain(bo, domain, true); r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { bo->pin_count = 1; @@ -317,53 +316,6 @@ int qxl_bo_check_id(struct qxl_device *qdev, struct qxl_bo *bo) return 0; } -void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed) -{ - struct qxl_bo_list *entry, *sf; - - list_for_each_entry_safe(entry, sf, &reloc_list->bos, lhead) { - qxl_bo_unreserve(entry->bo); - list_del(&entry->lhead); - kfree(entry); - } -} - -int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo) -{ - struct qxl_bo_list *entry; - int ret; - - list_for_each_entry(entry, &reloc_list->bos, lhead) { - if (entry->bo == bo) - return 0; - } - - entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - entry->bo = bo; - list_add(&entry->lhead, &reloc_list->bos); - - ret = qxl_bo_reserve(bo, false); - if (ret) - return ret; - - if (!bo->pin_count) { - qxl_ttm_placement_from_domain(bo, bo->type); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, - true, false); - if (ret) - return ret; - } - - /* allocate a surface for reserved + validated buffers */ - ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo); - if (ret) - return ret; - return 0; -} - int qxl_surf_evict(struct qxl_device *qdev) { return ttm_bo_evict_mm(&qdev->mman.bdev, TTM_PL_PRIV0); diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h index ee7ad79ce781..8cb6167038e5 100644 --- a/drivers/gpu/drm/qxl/qxl_object.h +++ b/drivers/gpu/drm/qxl/qxl_object.h @@ -88,7 +88,7 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type, extern int qxl_bo_create(struct qxl_device *qdev, unsigned long size, - bool kernel, u32 domain, + bool kernel, bool pinned, u32 domain, struct qxl_surface *surf, struct qxl_bo **bo_ptr); extern int qxl_bo_kmap(struct qxl_bo *bo, void **ptr); @@ -99,9 +99,7 @@ extern struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo); extern void qxl_bo_unref(struct qxl_bo **bo); extern int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr); extern int qxl_bo_unpin(struct qxl_bo *bo); -extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain); +extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned); extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo); -extern int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo); -extern void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed); #endif diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index b443d6751d5f..b61449e52cd5 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -38,7 +38,8 @@ static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE }; static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO }; -uint64_t + +static uint64_t qxl_release_alloc(struct qxl_device *qdev, int type, struct qxl_release **ret) { @@ -53,9 +54,9 @@ qxl_release_alloc(struct qxl_device *qdev, int type, return 0; } release->type = type; - release->bo_count = 0; release->release_offset = 0; release->surface_release_id = 0; + INIT_LIST_HEAD(&release->bos); idr_preload(GFP_KERNEL); spin_lock(&qdev->release_idr_lock); @@ -77,20 +78,20 @@ void qxl_release_free(struct qxl_device *qdev, struct qxl_release *release) { - int i; - - QXL_INFO(qdev, "release %d, type %d, %d bos\n", release->id, - release->type, release->bo_count); + struct qxl_bo_list *entry, *tmp; + QXL_INFO(qdev, "release %d, type %d\n", release->id, + release->type); if (release->surface_release_id) qxl_surface_id_dealloc(qdev, release->surface_release_id); - for (i = 0 ; i < release->bo_count; ++i) { + list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) { + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); QXL_INFO(qdev, "release %llx\n", - release->bos[i]->tbo.addr_space_offset + entry->tv.bo->addr_space_offset - DRM_FILE_OFFSET); - qxl_fence_remove_release(&release->bos[i]->fence, release->id); - qxl_bo_unref(&release->bos[i]); + qxl_fence_remove_release(&bo->fence, release->id); + qxl_bo_unref(&bo); } spin_lock(&qdev->release_idr_lock); idr_remove(&qdev->release_idr, release->id); @@ -98,83 +99,117 @@ qxl_release_free(struct qxl_device *qdev, kfree(release); } -void -qxl_release_add_res(struct qxl_device *qdev, struct qxl_release *release, - struct qxl_bo *bo) -{ - int i; - for (i = 0; i < release->bo_count; i++) - if (release->bos[i] == bo) - return; - - if (release->bo_count >= QXL_MAX_RES) { - DRM_ERROR("exceeded max resource on a qxl_release item\n"); - return; - } - release->bos[release->bo_count++] = qxl_bo_ref(bo); -} - static int qxl_release_bo_alloc(struct qxl_device *qdev, struct qxl_bo **bo) { int ret; - ret = qxl_bo_create(qdev, PAGE_SIZE, false, QXL_GEM_DOMAIN_VRAM, NULL, + /* pin releases bo's they are too messy to evict */ + ret = qxl_bo_create(qdev, PAGE_SIZE, false, true, + QXL_GEM_DOMAIN_VRAM, NULL, bo); return ret; } -int qxl_release_reserve(struct qxl_device *qdev, - struct qxl_release *release, bool no_wait) +int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo) +{ + struct qxl_bo_list *entry; + + list_for_each_entry(entry, &release->bos, tv.head) { + if (entry->tv.bo == &bo->tbo) + return 0; + } + + entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + qxl_bo_ref(bo); + entry->tv.bo = &bo->tbo; + list_add_tail(&entry->tv.head, &release->bos); + return 0; +} + +static int qxl_release_validate_bo(struct qxl_bo *bo) { int ret; - if (atomic_inc_return(&release->bos[0]->reserve_count) == 1) { - ret = qxl_bo_reserve(release->bos[0], no_wait); + + if (!bo->pin_count) { + qxl_ttm_placement_from_domain(bo, bo->type, false); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, + true, false); if (ret) return ret; } + + /* allocate a surface for reserved + validated buffers */ + ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo); + if (ret) + return ret; + return 0; +} + +int qxl_release_reserve_list(struct qxl_release *release, bool no_intr) +{ + int ret; + struct qxl_bo_list *entry; + + /* if only one object on the release its the release itself + since these objects are pinned no need to reserve */ + if (list_is_singular(&release->bos)) + return 0; + + ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos); + if (ret) + return ret; + + list_for_each_entry(entry, &release->bos, tv.head) { + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); + + ret = qxl_release_validate_bo(bo); + if (ret) { + ttm_eu_backoff_reservation(&release->ticket, &release->bos); + return ret; + } + } return 0; } -void qxl_release_unreserve(struct qxl_device *qdev, - struct qxl_release *release) +void qxl_release_backoff_reserve_list(struct qxl_release *release) { - if (atomic_dec_and_test(&release->bos[0]->reserve_count)) - qxl_bo_unreserve(release->bos[0]); + /* if only one object on the release its the release itself + since these objects are pinned no need to reserve */ + if (list_is_singular(&release->bos)) + return; + + ttm_eu_backoff_reservation(&release->ticket, &release->bos); } + int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, enum qxl_surface_cmd_type surface_cmd_type, struct qxl_release *create_rel, struct qxl_release **release) { - int ret; - if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) { int idr_ret; + struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head); struct qxl_bo *bo; union qxl_release_info *info; /* stash the release after the create command */ idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release); - bo = qxl_bo_ref(create_rel->bos[0]); + bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo)); (*release)->release_offset = create_rel->release_offset + 64; - qxl_release_add_res(qdev, *release, bo); + qxl_release_list_add(*release, bo); - ret = qxl_release_reserve(qdev, *release, false); - if (ret) { - DRM_ERROR("release reserve failed\n"); - goto out_unref; - } info = qxl_release_map(qdev, *release); info->id = idr_ret; qxl_release_unmap(qdev, *release, info); - -out_unref: qxl_bo_unref(&bo); - return ret; + return 0; } return qxl_alloc_release_reserved(qdev, sizeof(struct qxl_surface_cmd), @@ -187,7 +222,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, { struct qxl_bo *bo; int idr_ret; - int ret; + int ret = 0; union qxl_release_info *info; int cur_idx; @@ -216,11 +251,6 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, mutex_unlock(&qdev->release_mutex); return ret; } - - /* pin releases bo's they are too messy to evict */ - ret = qxl_bo_reserve(qdev->current_release_bo[cur_idx], false); - qxl_bo_pin(qdev->current_release_bo[cur_idx], QXL_GEM_DOMAIN_VRAM, NULL); - qxl_bo_unreserve(qdev->current_release_bo[cur_idx]); } bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]); @@ -231,36 +261,18 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, if (rbo) *rbo = bo; - qxl_release_add_res(qdev, *release, bo); - - ret = qxl_release_reserve(qdev, *release, false); mutex_unlock(&qdev->release_mutex); - if (ret) - goto out_unref; + + qxl_release_list_add(*release, bo); info = qxl_release_map(qdev, *release); info->id = idr_ret; qxl_release_unmap(qdev, *release, info); -out_unref: qxl_bo_unref(&bo); return ret; } -int qxl_fence_releaseable(struct qxl_device *qdev, - struct qxl_release *release) -{ - int i, ret; - for (i = 0; i < release->bo_count; i++) { - if (!release->bos[i]->tbo.sync_obj) - release->bos[i]->tbo.sync_obj = &release->bos[i]->fence; - ret = qxl_fence_add_release(&release->bos[i]->fence, release->id); - if (ret) - return ret; - } - return 0; -} - struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev, uint64_t id) { @@ -273,10 +285,7 @@ struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev, DRM_ERROR("failed to find id in release_idr\n"); return NULL; } - if (release->bo_count < 1) { - DRM_ERROR("read a released resource with 0 bos\n"); - return NULL; - } + return release; } @@ -285,9 +294,12 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev, { void *ptr; union qxl_release_info *info; - struct qxl_bo *bo = release->bos[0]; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE); + if (!ptr) + return NULL; info = ptr + (release->release_offset & ~PAGE_SIZE); return info; } @@ -296,9 +308,51 @@ void qxl_release_unmap(struct qxl_device *qdev, struct qxl_release *release, union qxl_release_info *info) { - struct qxl_bo *bo = release->bos[0]; + struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); + struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); void *ptr; ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE); qxl_bo_kunmap_atomic_page(qdev, bo, ptr); } + +void qxl_release_fence_buffer_objects(struct qxl_release *release) +{ + struct ttm_validate_buffer *entry; + struct ttm_buffer_object *bo; + struct ttm_bo_global *glob; + struct ttm_bo_device *bdev; + struct ttm_bo_driver *driver; + struct qxl_bo *qbo; + + /* if only one object on the release its the release itself + since these objects are pinned no need to reserve */ + if (list_is_singular(&release->bos)) + return; + + bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo; + bdev = bo->bdev; + driver = bdev->driver; + glob = bo->glob; + + spin_lock(&glob->lru_lock); + spin_lock(&bdev->fence_lock); + + list_for_each_entry(entry, &release->bos, head) { + bo = entry->bo; + qbo = to_qxl_bo(bo); + + if (!entry->bo->sync_obj) + entry->bo->sync_obj = &qbo->fence; + + qxl_fence_add_release_locked(&qbo->fence, release->id); + + ttm_bo_add_to_lru(bo); + ww_mutex_unlock(&bo->resv->lock); + entry->reserved = false; + } + spin_unlock(&bdev->fence_lock); + spin_unlock(&glob->lru_lock); + ww_acquire_fini(&release->ticket); +} + diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 489cb8cece4d..1dfd84cda2a1 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -206,7 +206,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo, return; } qbo = container_of(bo, struct qxl_bo, tbo); - qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU); + qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU, false); *placement = qbo->placement; } diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 064023bed480..32501f6ec991 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -44,6 +44,41 @@ static char *pre_emph_names[] = { }; /***** radeon AUX functions *****/ + +/* Atom needs data in little endian format + * so swap as appropriate when copying data to + * or from atom. Note that atom operates on + * dw units. + */ +static void radeon_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) +{ +#ifdef __BIG_ENDIAN + u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */ + u32 *dst32, *src32; + int i; + + memcpy(src_tmp, src, num_bytes); + src32 = (u32 *)src_tmp; + dst32 = (u32 *)dst_tmp; + if (to_le) { + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = cpu_to_le32(src32[i]); + memcpy(dst, dst_tmp, num_bytes); + } else { + u8 dws = num_bytes & ~3; + for (i = 0; i < ((num_bytes + 3) / 4); i++) + dst32[i] = le32_to_cpu(src32[i]); + memcpy(dst, dst_tmp, dws); + if (num_bytes % 4) { + for (i = 0; i < (num_bytes % 4); i++) + dst[dws+i] = dst_tmp[dws+i]; + } + } +#else + memcpy(dst, src, num_bytes); +#endif +} + union aux_channel_transaction { PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1; PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2; @@ -65,10 +100,10 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1); - memcpy(base, send, send_bytes); + radeon_copy_swap(base, send, send_bytes, true); - args.v1.lpAuxRequest = 0 + 4; - args.v1.lpDataOut = 16 + 4; + args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4)); + args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4)); args.v1.ucDataOutLen = 0; args.v1.ucChannelID = chan->rec.i2c_id; args.v1.ucDelay = delay / 10; @@ -102,7 +137,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, recv_bytes = recv_size; if (recv && recv_size) - memcpy(recv, base + 16, recv_bytes); + radeon_copy_swap(recv, base + 16, recv_bytes, false); return recv_bytes; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 393880a09412..10f712e37003 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3166,7 +3166,7 @@ int r600_copy_cpdma(struct radeon_device *rdev, size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); - r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); @@ -3181,6 +3181,9 @@ int r600_copy_cpdma(struct radeon_device *rdev, radeon_semaphore_free(rdev, &sem, NULL); } + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, WAIT_3D_IDLE_bit); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; if (cur_size_in_bytes > 0x1fffff) diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index b88f54b134ab..e5c860f4ccbe 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -278,9 +278,9 @@ bool r600_dynamicpm_enabled(struct radeon_device *rdev) void r600_enable_sclk_control(struct radeon_device *rdev, bool enable) { if (enable) - WREG32_P(GENERAL_PWRMGT, 0, ~SCLK_PWRMGT_OFF); + WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF); else - WREG32_P(GENERAL_PWRMGT, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF); + WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF); } void r600_enable_mclk_control(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 78bec1a58ed1..f8f8b3113ddd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1161,6 +1161,7 @@ static struct radeon_asic rv6xx_asic = { .get_mclk = &rv6xx_dpm_get_mclk, .print_power_state = &rv6xx_dpm_print_power_state, .debugfs_print_current_performance_level = &rv6xx_dpm_debugfs_print_current_performance_level, + .force_performance_level = &rv6xx_dpm_force_performance_level, }, .pflip = { .pre_page_flip = &rs600_pre_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index ca1895709908..902479fa737f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -421,6 +421,8 @@ void rv6xx_dpm_print_power_state(struct radeon_device *rdev, struct radeon_ps *ps); void rv6xx_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m); +int rv6xx_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level); /* rs780 dpm */ int rs780_dpm_init(struct radeon_device *rdev); int rs780_dpm_enable(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 78edadc9e86b..68ce36056019 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -147,7 +147,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, enum radeon_combios_table_offset table) { struct radeon_device *rdev = dev->dev_private; - int rev; + int rev, size; uint16_t offset = 0, check_offset; if (!rdev->bios) @@ -156,174 +156,106 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, switch (table) { /* absolute offset tables */ case COMBIOS_ASIC_INIT_1_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0xc); - if (check_offset) - offset = check_offset; + check_offset = 0xc; break; case COMBIOS_BIOS_SUPPORT_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x14); - if (check_offset) - offset = check_offset; + check_offset = 0x14; break; case COMBIOS_DAC_PROGRAMMING_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2a); - if (check_offset) - offset = check_offset; + check_offset = 0x2a; break; case COMBIOS_MAX_COLOR_DEPTH_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2c); - if (check_offset) - offset = check_offset; + check_offset = 0x2c; break; case COMBIOS_CRTC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x2e); - if (check_offset) - offset = check_offset; + check_offset = 0x2e; break; case COMBIOS_PLL_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x30); - if (check_offset) - offset = check_offset; + check_offset = 0x30; break; case COMBIOS_TV_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x32); - if (check_offset) - offset = check_offset; + check_offset = 0x32; break; case COMBIOS_DFP_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x34); - if (check_offset) - offset = check_offset; + check_offset = 0x34; break; case COMBIOS_HW_CONFIG_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x36); - if (check_offset) - offset = check_offset; + check_offset = 0x36; break; case COMBIOS_MULTIMEDIA_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x38); - if (check_offset) - offset = check_offset; + check_offset = 0x38; break; case COMBIOS_TV_STD_PATCH_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x3e); - if (check_offset) - offset = check_offset; + check_offset = 0x3e; break; case COMBIOS_LCD_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x40); - if (check_offset) - offset = check_offset; + check_offset = 0x40; break; case COMBIOS_MOBILE_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x42); - if (check_offset) - offset = check_offset; + check_offset = 0x42; break; case COMBIOS_PLL_INIT_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x46); - if (check_offset) - offset = check_offset; + check_offset = 0x46; break; case COMBIOS_MEM_CONFIG_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x48); - if (check_offset) - offset = check_offset; + check_offset = 0x48; break; case COMBIOS_SAVE_MASK_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4a); - if (check_offset) - offset = check_offset; + check_offset = 0x4a; break; case COMBIOS_HARDCODED_EDID_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4c); - if (check_offset) - offset = check_offset; + check_offset = 0x4c; break; case COMBIOS_ASIC_INIT_2_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x4e); - if (check_offset) - offset = check_offset; + check_offset = 0x4e; break; case COMBIOS_CONNECTOR_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x50); - if (check_offset) - offset = check_offset; + check_offset = 0x50; break; case COMBIOS_DYN_CLK_1_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x52); - if (check_offset) - offset = check_offset; + check_offset = 0x52; break; case COMBIOS_RESERVED_MEM_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x54); - if (check_offset) - offset = check_offset; + check_offset = 0x54; break; case COMBIOS_EXT_TMDS_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x58); - if (check_offset) - offset = check_offset; + check_offset = 0x58; break; case COMBIOS_MEM_CLK_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5a); - if (check_offset) - offset = check_offset; + check_offset = 0x5a; break; case COMBIOS_EXT_DAC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5c); - if (check_offset) - offset = check_offset; + check_offset = 0x5c; break; case COMBIOS_MISC_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x5e); - if (check_offset) - offset = check_offset; + check_offset = 0x5e; break; case COMBIOS_CRT_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x60); - if (check_offset) - offset = check_offset; + check_offset = 0x60; break; case COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x62); - if (check_offset) - offset = check_offset; + check_offset = 0x62; break; case COMBIOS_COMPONENT_VIDEO_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x64); - if (check_offset) - offset = check_offset; + check_offset = 0x64; break; case COMBIOS_FAN_SPEED_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x66); - if (check_offset) - offset = check_offset; + check_offset = 0x66; break; case COMBIOS_OVERDRIVE_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x68); - if (check_offset) - offset = check_offset; + check_offset = 0x68; break; case COMBIOS_OEM_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6a); - if (check_offset) - offset = check_offset; + check_offset = 0x6a; break; case COMBIOS_DYN_CLK_2_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6c); - if (check_offset) - offset = check_offset; + check_offset = 0x6c; break; case COMBIOS_POWER_CONNECTOR_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x6e); - if (check_offset) - offset = check_offset; + check_offset = 0x6e; break; case COMBIOS_I2C_INFO_TABLE: - check_offset = RBIOS16(rdev->bios_header_start + 0x70); - if (check_offset) - offset = check_offset; + check_offset = 0x70; break; /* relative offset tables */ case COMBIOS_ASIC_INIT_3_TABLE: /* offset from misc info */ @@ -439,11 +371,16 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, } break; default: + check_offset = 0; break; } - return offset; + size = RBIOS8(rdev->bios_header_start + 0x6); + /* check absolute offset tables */ + if (table < COMBIOS_ASIC_INIT_3_TABLE && check_offset && check_offset < size) + offset = RBIOS16(rdev->bios_header_start + check_offset); + return offset; } bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) @@ -965,16 +902,22 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct dac = RBIOS8(dac_info + 0x3) & 0xf; p_dac->ps2_pdac_adj = (bg << 8) | (dac); } - /* if the values are all zeros, use the table */ - if (p_dac->ps2_pdac_adj) + /* if the values are zeros, use the table */ + if ((dac == 0) || (bg == 0)) + found = 0; + else found = 1; } /* quirks */ + /* Radeon 7000 (RV100) */ + if (((dev->pdev->device == 0x5159) && + (dev->pdev->subsystem_vendor == 0x174B) && + (dev->pdev->subsystem_device == 0x7c28)) || /* Radeon 9100 (R200) */ - if ((dev->pdev->device == 0x514D) && + ((dev->pdev->device == 0x514D) && (dev->pdev->subsystem_vendor == 0x174B) && - (dev->pdev->subsystem_device == 0x7149)) { + (dev->pdev->subsystem_device == 0x7149))) { /* vbios value is bad, use the default */ found = 0; } diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index d9d31a383276..6a51d943ccf4 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -466,7 +466,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) size += rdev->vm_manager.max_pfn * 8; size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_VM_PTB_ALIGN(size), + RADEON_GPU_PAGE_ALIGN(size), RADEON_VM_PTB_ALIGN_SIZE, RADEON_GEM_DOMAIN_VRAM); if (r) { @@ -621,7 +621,7 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) } retry: - pd_size = RADEON_VM_PTB_ALIGN(radeon_vm_directory_size(rdev)); + pd_size = radeon_vm_directory_size(rdev); r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->page_directory, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false); @@ -953,8 +953,8 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev, retry: r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->page_tables[pt_idx], - RADEON_VM_PTB_ALIGN(RADEON_VM_PTE_COUNT * 8), - RADEON_VM_PTB_ALIGN_SIZE, false); + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); if (r == -ENOMEM) { r = radeon_vm_evict(rdev, vm); diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index 65e33f387341..363018c60412 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -819,7 +819,7 @@ static void rv6xx_program_memory_timing_parameters(struct radeon_device *rdev) POWERMODE1(calculate_memory_refresh_rate(rdev, pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) | POWERMODE2(calculate_memory_refresh_rate(rdev, - pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) | + pi->hw.sclks[R600_POWER_LEVEL_HIGH])) | POWERMODE3(calculate_memory_refresh_rate(rdev, pi->hw.sclks[R600_POWER_LEVEL_HIGH]))); WREG32(ARB_RFSH_RATE, arb_refresh_rate); @@ -1182,10 +1182,10 @@ static void rv6xx_program_display_gap(struct radeon_device *rdev) u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL); tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK); - if (RREG32(AVIVO_D1CRTC_CONTROL) & AVIVO_CRTC_EN) { + if (rdev->pm.dpm.new_active_crtcs & 1) { tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK); tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE); - } else if (RREG32(AVIVO_D2CRTC_CONTROL) & AVIVO_CRTC_EN) { + } else if (rdev->pm.dpm.new_active_crtcs & 2) { tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE); tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK); } else { @@ -1670,6 +1670,8 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev) struct radeon_ps *old_ps = rdev->pm.dpm.current_ps; int ret; + pi->restricted_levels = 0; + rv6xx_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps); rv6xx_clear_vc(rdev); @@ -1756,6 +1758,8 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev) rv6xx_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); + rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; + return 0; } @@ -2085,3 +2089,34 @@ u32 rv6xx_dpm_get_mclk(struct radeon_device *rdev, bool low) else return requested_state->high.mclk; } + +int rv6xx_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level) +{ + struct rv6xx_power_info *pi = rv6xx_get_pi(rdev); + + if (level == RADEON_DPM_FORCED_LEVEL_HIGH) { + pi->restricted_levels = 3; + } else if (level == RADEON_DPM_FORCED_LEVEL_LOW) { + pi->restricted_levels = 2; + } else { + pi->restricted_levels = 0; + } + + rv6xx_clear_vc(rdev); + r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, true); + r600_set_at(rdev, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); + r600_wait_for_power_level(rdev, R600_POWER_LEVEL_LOW); + r600_power_level_enable(rdev, R600_POWER_LEVEL_HIGH, false); + r600_power_level_enable(rdev, R600_POWER_LEVEL_MEDIUM, false); + rv6xx_enable_medium(rdev); + rv6xx_enable_high(rdev); + if (pi->restricted_levels == 3) + r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, false); + rv6xx_program_vc(rdev); + rv6xx_program_at(rdev); + + rdev->pm.dpm.forced_level = level; + + return 0; +} diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 4c605c70ebf9..deb5c25305af 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -562,7 +562,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, struct hv_hotadd_state *has) { int ret = 0; - int i, nid, t; + int i, nid; unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; @@ -607,14 +607,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, /* * Wait for the memory block to be onlined. + * Since the hot add has succeeded, it is ok to + * proceed even if the pages in the hot added region + * have not been "onlined" within the allowed time. */ - t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - if (t == 0) { - pr_info("hot_add memory timedout\n"); - has->ha_end_pfn -= HA_CHUNK; - has->covered_end_pfn -= processed_pfn; - break; - } + wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); } @@ -978,6 +975,14 @@ static void post_status(struct hv_dynmem_device *dm) dm->num_pages_ballooned + compute_balloon_floor(); + /* + * If our transaction ID is no longer current, just don't + * send the status. This can happen if we were interrupted + * after we picked our transaction ID. + */ + if (status.hdr.trans_id != atomic_read(&trans_id)) + return; + vmbus_sendpacket(dm->dev->channel, &status, sizeof(struct dm_status), (unsigned long)NULL, diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a2464bf07c49..e8e071fc1d6d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -690,7 +690,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) if (ret) pr_err("Unable to register child device\n"); else - pr_info("child device %s registered\n", + pr_debug("child device %s registered\n", dev_name(&child_device_obj->device)); return ret; @@ -702,14 +702,14 @@ int vmbus_device_register(struct hv_device *child_device_obj) */ void vmbus_device_unregister(struct hv_device *device_obj) { + pr_debug("child device %s unregistered\n", + dev_name(&device_obj->device)); + /* * Kick off the process of unregistering the device. * This will call vmbus_remove() and eventually vmbus_device_release() */ device_unregister(&device_obj->device); - - pr_info("child device %s unregistered\n", - dev_name(&device_obj->device)); } diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 048f2947e08b..e45f5575fd4d 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -63,7 +63,10 @@ #include "bcache.h" #include "btree.h" +#include <linux/freezer.h> +#include <linux/kthread.h> #include <linux/random.h> +#include <trace/events/bcache.h> #define MAX_IN_FLIGHT_DISCARDS 8U @@ -151,7 +154,7 @@ static void discard_finish(struct work_struct *w) mutex_unlock(&ca->set->bucket_lock); closure_wake_up(&ca->set->bucket_wait); - wake_up(&ca->set->alloc_wait); + wake_up_process(ca->alloc_thread); closure_put(&ca->set->cl); } @@ -350,38 +353,30 @@ static void invalidate_buckets(struct cache *ca) break; } - pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu", - fifo_used(&ca->free), ca->free.size, - fifo_used(&ca->free_inc), ca->free_inc.size, - fifo_used(&ca->unused), ca->unused.size); + trace_bcache_alloc_invalidate(ca); } #define allocator_wait(ca, cond) \ do { \ - DEFINE_WAIT(__wait); \ - \ while (1) { \ - prepare_to_wait(&ca->set->alloc_wait, \ - &__wait, TASK_INTERRUPTIBLE); \ + set_current_state(TASK_INTERRUPTIBLE); \ if (cond) \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ - if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ - finish_wait(&ca->set->alloc_wait, &__wait); \ - closure_return(cl); \ - } \ + if (kthread_should_stop()) \ + return 0; \ \ + try_to_freeze(); \ schedule(); \ mutex_lock(&(ca)->set->bucket_lock); \ } \ - \ - finish_wait(&ca->set->alloc_wait, &__wait); \ + __set_current_state(TASK_RUNNING); \ } while (0) -void bch_allocator_thread(struct closure *cl) +static int bch_allocator_thread(void *arg) { - struct cache *ca = container_of(cl, struct cache, alloc); + struct cache *ca = arg; mutex_lock(&ca->set->bucket_lock); @@ -442,7 +437,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) { long r = -1; again: - wake_up(&ca->set->alloc_wait); + wake_up_process(ca->alloc_thread); if (fifo_used(&ca->free) > ca->watermark[watermark] && fifo_pop(&ca->free, r)) { @@ -476,9 +471,7 @@ again: return r; } - pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", - atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free), - fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + trace_bcache_alloc_fail(ca); if (cl) { closure_wait(&ca->set->bucket_wait, cl); @@ -552,6 +545,17 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, /* Init */ +int bch_cache_allocator_start(struct cache *ca) +{ + struct task_struct *k = kthread_run(bch_allocator_thread, + ca, "bcache_allocator"); + if (IS_ERR(k)) + return PTR_ERR(k); + + ca->alloc_thread = k; + return 0; +} + void bch_cache_allocator_exit(struct cache *ca) { struct discard *d; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d3e15b42a4ab..b39f6f0b45f2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -178,7 +178,6 @@ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #include <linux/bio.h> -#include <linux/blktrace_api.h> #include <linux/kobject.h> #include <linux/list.h> #include <linux/mutex.h> @@ -388,8 +387,6 @@ struct keybuf_key { typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); struct keybuf { - keybuf_pred_fn *key_predicate; - struct bkey last_scanned; spinlock_t lock; @@ -437,9 +434,12 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; + int flush_done; + + uint64_t nr_stripes; + unsigned stripe_size_bits; + atomic_t *stripe_sectors_dirty; - atomic_long_t sectors_dirty; - unsigned long sectors_dirty_gc; unsigned long sectors_dirty_last; long sectors_dirty_derivative; @@ -531,6 +531,7 @@ struct cached_dev { unsigned sequential_merge:1; unsigned verify:1; + unsigned partial_stripes_expensive:1; unsigned writeback_metadata:1; unsigned writeback_running:1; unsigned char writeback_percent; @@ -565,8 +566,7 @@ struct cache { unsigned watermark[WATERMARK_MAX]; - struct closure alloc; - struct workqueue_struct *alloc_workqueue; + struct task_struct *alloc_thread; struct closure prio; struct prio_set *disk_buckets; @@ -664,13 +664,9 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). - * - * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down - * the allocation thread. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 -#define CACHE_SET_STOPPING_2 2 struct cache_set { struct closure cl; @@ -703,9 +699,6 @@ struct cache_set { /* For the btree cache */ struct shrinker shrink; - /* For the allocator itself */ - wait_queue_head_t alloc_wait; - /* For the btree cache and anything allocation related */ struct mutex bucket_lock; @@ -823,10 +816,9 @@ struct cache_set { /* * A btree node on disk could have too many bsets for an iterator to fit - * on the stack - this is a single element mempool for btree_read_work() + * on the stack - have to dynamically allocate them */ - struct mutex fill_lock; - struct btree_iter *fill_iter; + mempool_t *fill_iter; /* * btree_sort() is a merge sort and requires temporary space - single @@ -834,6 +826,7 @@ struct cache_set { */ struct mutex sort_lock; struct bset *sort; + unsigned sort_crit_factor; /* List of buckets we're currently writing data to */ struct list_head data_buckets; @@ -906,8 +899,6 @@ static inline unsigned local_clock_us(void) return local_clock() >> 10; } -#define MAX_BSETS 4U - #define BTREE_PRIO USHRT_MAX #define INITIAL_PRIO 32768 @@ -1112,23 +1103,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k) atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); } -/* Blktrace macros */ - -#define blktrace_msg(c, fmt, ...) \ -do { \ - struct request_queue *q = bdev_get_queue(c->bdev); \ - if (q) \ - blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \ -} while (0) - -#define blktrace_msg_all(s, fmt, ...) \ -do { \ - struct cache *_c; \ - unsigned i; \ - for_each_cache(_c, (s), i) \ - blktrace_msg(_c, fmt, ##__VA_ARGS__); \ -} while (0) - static inline void cached_dev_put(struct cached_dev *dc) { if (atomic_dec_and_test(&dc->count)) @@ -1173,10 +1147,16 @@ static inline uint8_t bucket_disk_gen(struct bucket *b) static struct kobj_attribute ksysfs_##n = \ __ATTR(n, S_IWUSR|S_IRUSR, show, store) -/* Forward declarations */ +static inline void wake_up_allocators(struct cache_set *c) +{ + struct cache *ca; + unsigned i; + + for_each_cache(ca, c, i) + wake_up_process(ca->alloc_thread); +} -void bch_writeback_queue(struct cached_dev *); -void bch_writeback_add(struct cached_dev *, unsigned); +/* Forward declarations */ void bch_count_io_errors(struct cache *, int, const char *); void bch_bbio_count_io_errors(struct cache_set *, struct bio *, @@ -1193,7 +1173,6 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); uint8_t bch_inc_gen(struct cache *, struct bucket *); void bch_rescale_priorities(struct cache_set *, int); bool bch_bucket_add_unused(struct cache *, struct bucket *); -void bch_allocator_thread(struct closure *); long bch_bucket_alloc(struct cache *, unsigned, struct closure *); void bch_bucket_free(struct cache_set *, struct bkey *); @@ -1241,9 +1220,9 @@ void bch_cache_set_stop(struct cache_set *); struct cache_set *bch_cache_set_alloc(struct cache_sb *); void bch_btree_cache_free(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *); -void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); +int bch_cache_allocator_start(struct cache *ca); void bch_cache_allocator_exit(struct cache *ca); int bch_cache_allocator_init(struct cache *ca); diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 1d27d3af3251..8010eed06a51 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -78,6 +78,7 @@ struct bkey *bch_keylist_pop(struct keylist *l) bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) { unsigned i; + char buf[80]; if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) goto bad; @@ -102,7 +103,8 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) return false; bad: - cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k)); + bch_bkey_to_text(buf, sizeof(buf), k); + cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k)); return true; } @@ -162,10 +164,16 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) #ifdef CONFIG_BCACHE_EDEBUG bug: mutex_unlock(&b->c->bucket_lock); - btree_bug(b, + + { + char buf[80]; + + bch_bkey_to_text(buf, sizeof(buf), k); + btree_bug(b, "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", - pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), - g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); + } return true; #endif } @@ -1084,33 +1092,39 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) new->sets->size = 0; } +#define SORT_CRIT (4096 / sizeof(uint64_t)) + void bch_btree_sort_lazy(struct btree *b) { - if (b->nsets) { - unsigned i, j, keys = 0, total; + unsigned crit = SORT_CRIT; + int i; - for (i = 0; i <= b->nsets; i++) - keys += b->sets[i].data->keys; - - total = keys; + /* Don't sort if nothing to do */ + if (!b->nsets) + goto out; - for (j = 0; j < b->nsets; j++) { - if (keys * 2 < total || - keys < 1000) { - bch_btree_sort_partial(b, j); - return; - } + /* If not a leaf node, always sort */ + if (b->level) { + bch_btree_sort(b); + return; + } - keys -= b->sets[j].data->keys; - } + for (i = b->nsets - 1; i >= 0; --i) { + crit *= b->c->sort_crit_factor; - /* Must sort if b->nsets == 3 or we'll overflow */ - if (b->nsets >= (MAX_BSETS - 1) - b->level) { - bch_btree_sort(b); + if (b->sets[i].data->keys < crit) { + bch_btree_sort_partial(b, i); return; } } + /* Sort if we'd overflow */ + if (b->nsets + 1 == MAX_BSETS) { + bch_btree_sort(b); + return; + } + +out: bset_build_written_tree(b); } diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 57a9cff41546..ae115a253d73 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -1,6 +1,8 @@ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H +#include <linux/slab.h> + /* * BKEYS: * @@ -142,6 +144,8 @@ /* Btree key comparison/iteration */ +#define MAX_BSETS 4U + struct btree_iter { size_t size, used; struct btree_iter_set { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7a5658f04e62..ee372884c405 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -24,6 +24,7 @@ #include "btree.h" #include "debug.h" #include "request.h" +#include "writeback.h" #include <linux/slab.h> #include <linux/bitops.h> @@ -134,44 +135,17 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) return crc ^ 0xffffffffffffffffULL; } -static void btree_bio_endio(struct bio *bio, int error) +static void bch_btree_node_read_done(struct btree *b) { - struct closure *cl = bio->bi_private; - struct btree *b = container_of(cl, struct btree, io.cl); - - if (error) - set_btree_node_io_error(b); - - bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE) - ? "writing btree" : "reading btree"); - closure_put(cl); -} - -static void btree_bio_init(struct btree *b) -{ - BUG_ON(b->bio); - b->bio = bch_bbio_alloc(b->c); - - b->bio->bi_end_io = btree_bio_endio; - b->bio->bi_private = &b->io.cl; -} - -void bch_btree_read_done(struct closure *cl) -{ - struct btree *b = container_of(cl, struct btree, io.cl); - struct bset *i = b->sets[0].data; - struct btree_iter *iter = b->c->fill_iter; const char *err = "bad btree header"; - BUG_ON(b->nsets || b->written); - - bch_bbio_free(b->bio, b->c); - b->bio = NULL; + struct bset *i = b->sets[0].data; + struct btree_iter *iter; - mutex_lock(&b->c->fill_lock); + iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); + iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; - if (btree_node_io_error(b) || - !i->seq) + if (!i->seq) goto err; for (; @@ -228,17 +202,8 @@ void bch_btree_read_done(struct closure *cl) if (b->written < btree_blocks(b)) bch_bset_init_next(b); out: - - mutex_unlock(&b->c->fill_lock); - - spin_lock(&b->c->btree_read_time_lock); - bch_time_stats_update(&b->c->btree_read_time, b->io_start_time); - spin_unlock(&b->c->btree_read_time_lock); - - smp_wmb(); /* read_done is our write lock */ - set_btree_node_read_done(b); - - closure_return(cl); + mempool_free(iter, b->c->fill_iter); + return; err: set_btree_node_io_error(b); bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", @@ -247,48 +212,69 @@ err: goto out; } -void bch_btree_read(struct btree *b) +static void btree_node_read_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + closure_put(cl); +} + +void bch_btree_node_read(struct btree *b) { - BUG_ON(b->nsets || b->written); + uint64_t start_time = local_clock(); + struct closure cl; + struct bio *bio; + + trace_bcache_btree_read(b); + + closure_init_stack(&cl); + + bio = bch_bbio_alloc(b->c); + bio->bi_rw = REQ_META|READ_SYNC; + bio->bi_size = KEY_SIZE(&b->key) << 9; + bio->bi_end_io = btree_node_read_endio; + bio->bi_private = &cl; + + bch_bio_map(bio, b->sets[0].data); + + bch_submit_bbio(bio, b->c, &b->key, 0); + closure_sync(&cl); - if (!closure_trylock(&b->io.cl, &b->c->cl)) - BUG(); + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + set_btree_node_io_error(b); - b->io_start_time = local_clock(); + bch_bbio_free(bio, b->c); - btree_bio_init(b); - b->bio->bi_rw = REQ_META|READ_SYNC; - b->bio->bi_size = KEY_SIZE(&b->key) << 9; + if (btree_node_io_error(b)) + goto err; - bch_bio_map(b->bio, b->sets[0].data); + bch_btree_node_read_done(b); - pr_debug("%s", pbtree(b)); - trace_bcache_btree_read(b->bio); - bch_submit_bbio(b->bio, b->c, &b->key, 0); + spin_lock(&b->c->btree_read_time_lock); + bch_time_stats_update(&b->c->btree_read_time, start_time); + spin_unlock(&b->c->btree_read_time_lock); - continue_at(&b->io.cl, bch_btree_read_done, system_wq); + return; +err: + bch_cache_set_error(b->c, "io error reading bucket %lu", + PTR_BUCKET_NR(b->c, &b->key, 0)); } static void btree_complete_write(struct btree *b, struct btree_write *w) { if (w->prio_blocked && !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked)) - wake_up(&b->c->alloc_wait); + wake_up_allocators(b->c); if (w->journal) { atomic_dec_bug(w->journal); __closure_wake_up(&b->c->journal.wait); } - if (w->owner) - closure_put(w->owner); - w->prio_blocked = 0; w->journal = NULL; - w->owner = NULL; } -static void __btree_write_done(struct closure *cl) +static void __btree_node_write_done(struct closure *cl) { struct btree *b = container_of(cl, struct btree, io.cl); struct btree_write *w = btree_prev_write(b); @@ -304,7 +290,7 @@ static void __btree_write_done(struct closure *cl) closure_return(cl); } -static void btree_write_done(struct closure *cl) +static void btree_node_write_done(struct closure *cl) { struct btree *b = container_of(cl, struct btree, io.cl); struct bio_vec *bv; @@ -313,10 +299,22 @@ static void btree_write_done(struct closure *cl) __bio_for_each_segment(bv, b->bio, n, 0) __free_page(bv->bv_page); - __btree_write_done(cl); + __btree_node_write_done(cl); } -static void do_btree_write(struct btree *b) +static void btree_node_write_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree *b = container_of(cl, struct btree, io.cl); + + if (error) + set_btree_node_io_error(b); + + bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); + closure_put(cl); +} + +static void do_btree_node_write(struct btree *b) { struct closure *cl = &b->io.cl; struct bset *i = b->sets[b->nsets].data; @@ -325,15 +323,34 @@ static void do_btree_write(struct btree *b) i->version = BCACHE_BSET_VERSION; i->csum = btree_csum_set(b, i); - btree_bio_init(b); - b->bio->bi_rw = REQ_META|WRITE_SYNC; - b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); + BUG_ON(b->bio); + b->bio = bch_bbio_alloc(b->c); + + b->bio->bi_end_io = btree_node_write_endio; + b->bio->bi_private = &b->io.cl; + b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; + b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); + /* + * If we're appending to a leaf node, we don't technically need FUA - + * this write just needs to be persisted before the next journal write, + * which will be marked FLUSH|FUA. + * + * Similarly if we're writing a new btree root - the pointer is going to + * be in the next journal entry. + * + * But if we're writing a new btree node (that isn't a root) or + * appending to a non leaf btree node, we need either FUA or a flush + * when we write the parent with the new pointer. FUA is cheaper than a + * flush, and writes appending to leaf nodes aren't blocking anything so + * just make all btree node writes FUA to keep things sane. + */ + bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); - if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bio_alloc_pages(b->bio, GFP_NOIO)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); @@ -342,40 +359,41 @@ static void do_btree_write(struct btree *b) memcpy(page_address(bv->bv_page), base + j * PAGE_SIZE, PAGE_SIZE); - trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); - continue_at(cl, btree_write_done, NULL); + continue_at(cl, btree_node_write_done, NULL); } else { b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); - trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); closure_sync(cl); - __btree_write_done(cl); + __btree_node_write_done(cl); } } -static void __btree_write(struct btree *b) +void bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = b->sets[b->nsets].data; + trace_bcache_btree_write(b); + BUG_ON(current->bio_list); + BUG_ON(b->written >= btree_blocks(b)); + BUG_ON(b->written && !i->keys); + BUG_ON(b->sets->data->seq != i->seq); + bch_check_key_order(b, i); - closure_lock(&b->io, &b->c->cl); cancel_delayed_work(&b->work); + /* If caller isn't waiting for write, parent refcount is cache set */ + closure_lock(&b->io, parent ?: &b->c->cl); + clear_bit(BTREE_NODE_dirty, &b->flags); change_bit(BTREE_NODE_write_idx, &b->flags); - bch_check_key_order(b, i); - BUG_ON(b->written && !i->keys); - - do_btree_write(b); - - pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); + do_btree_node_write(b); b->written += set_blocks(i, b->c); atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, @@ -387,37 +405,31 @@ static void __btree_write(struct btree *b) bch_bset_init_next(b); } -static void btree_write_work(struct work_struct *w) +static void btree_node_write_work(struct work_struct *w) { struct btree *b = container_of(to_delayed_work(w), struct btree, work); - down_write(&b->lock); + rw_lock(true, b, b->level); if (btree_node_dirty(b)) - __btree_write(b); - up_write(&b->lock); + bch_btree_node_write(b, NULL); + rw_unlock(true, b); } -void bch_btree_write(struct btree *b, bool now, struct btree_op *op) +static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op) { struct bset *i = b->sets[b->nsets].data; struct btree_write *w = btree_current_write(b); - BUG_ON(b->written && - (b->written >= btree_blocks(b) || - i->seq != b->sets[0].data->seq || - !i->keys)); + BUG_ON(!b->written); + BUG_ON(!i->keys); - if (!btree_node_dirty(b)) { - set_btree_node_dirty(b); - queue_delayed_work(btree_io_wq, &b->work, - msecs_to_jiffies(30000)); - } + if (!btree_node_dirty(b)) + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); - w->prio_blocked += b->prio_blocked; - b->prio_blocked = 0; + set_btree_node_dirty(b); - if (op && op->journal && !b->level) { + if (op && op->journal) { if (w->journal && journal_pin_cmp(b->c, w, op)) { atomic_dec_bug(w->journal); @@ -430,23 +442,10 @@ void bch_btree_write(struct btree *b, bool now, struct btree_op *op) } } - if (current->bio_list) - return; - /* Force write if set is too big */ - if (now || - b->level || - set_bytes(i) > PAGE_SIZE - 48) { - if (op && now) { - /* Must wait on multiple writes */ - BUG_ON(w->owner); - w->owner = &op->cl; - closure_get(&op->cl); - } - - __btree_write(b); - } - BUG_ON(!b->written); + if (set_bytes(i) > PAGE_SIZE - 48 && + !current->bio_list) + bch_btree_node_write(b, NULL); } /* @@ -559,7 +558,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, init_rwsem(&b->lock); lockdep_set_novalidate_class(&b->lock); INIT_LIST_HEAD(&b->list); - INIT_DELAYED_WORK(&b->work, btree_write_work); + INIT_DELAYED_WORK(&b->work, btree_node_write_work); b->c = c; closure_init_unlocked(&b->io); @@ -582,7 +581,7 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) BUG_ON(btree_node_dirty(b) && !b->sets[0].data); if (cl && btree_node_dirty(b)) - bch_btree_write(b, true, NULL); + bch_btree_node_write(b, NULL); if (cl) closure_wait_event_async(&b->io.wait, cl, @@ -623,6 +622,13 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) else if (!mutex_trylock(&c->bucket_lock)) return -1; + /* + * It's _really_ critical that we don't free too many btree nodes - we + * have to always leave ourselves a reserve. The reserve is how we + * guarantee that allocating memory for a new btree node can always + * succeed, so that inserting keys into the btree can always succeed and + * IO can always make forward progress: + */ nr /= c->btree_pages; nr = min_t(unsigned long, nr, mca_can_free(c)); @@ -766,6 +772,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, int ret = -ENOMEM; struct btree *i; + trace_bcache_btree_cache_cannibalize(c); + if (!cl) return ERR_PTR(-ENOMEM); @@ -784,7 +792,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, return ERR_PTR(-EAGAIN); } - /* XXX: tracepoint */ c->try_harder = cl; c->try_harder_start = local_clock(); retry: @@ -905,6 +912,9 @@ retry: b = mca_find(c, k); if (!b) { + if (current->bio_list) + return ERR_PTR(-EAGAIN); + mutex_lock(&c->bucket_lock); b = mca_alloc(c, k, level, &op->cl); mutex_unlock(&c->bucket_lock); @@ -914,7 +924,7 @@ retry: if (IS_ERR(b)) return b; - bch_btree_read(b); + bch_btree_node_read(b); if (!write) downgrade_write(&b->lock); @@ -937,15 +947,12 @@ retry: for (; i <= b->nsets; i++) prefetch(b->sets[i].data); - if (!closure_wait_event(&b->io.wait, &op->cl, - btree_node_read_done(b))) { - rw_unlock(write, b); - b = ERR_PTR(-EAGAIN); - } else if (btree_node_io_error(b)) { + if (btree_node_io_error(b)) { rw_unlock(write, b); - b = ERR_PTR(-EIO); - } else - BUG_ON(!b->written); + return ERR_PTR(-EIO); + } + + BUG_ON(!b->written); return b; } @@ -959,7 +966,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) mutex_unlock(&c->bucket_lock); if (!IS_ERR_OR_NULL(b)) { - bch_btree_read(b); + bch_btree_node_read(b); rw_unlock(true, b); } } @@ -970,24 +977,19 @@ static void btree_node_free(struct btree *b, struct btree_op *op) { unsigned i; + trace_bcache_btree_node_free(b); + /* * The BUG_ON() in btree_node_get() implies that we must have a write * lock on parent to free or even invalidate a node */ BUG_ON(op->lock <= b->level); BUG_ON(b == b->c->root); - pr_debug("bucket %s", pbtree(b)); if (btree_node_dirty(b)) btree_complete_write(b, btree_current_write(b)); clear_bit(BTREE_NODE_dirty, &b->flags); - if (b->prio_blocked && - !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) - wake_up(&b->c->alloc_wait); - - b->prio_blocked = 0; - cancel_delayed_work(&b->work); mutex_lock(&b->c->bucket_lock); @@ -1028,17 +1030,20 @@ retry: goto retry; } - set_btree_node_read_done(b); b->accessed = 1; bch_bset_init_next(b); mutex_unlock(&c->bucket_lock); + + trace_bcache_btree_node_alloc(b); return b; err_free: bch_bucket_free(c, &k.key); __bkey_put(c, &k.key); err: mutex_unlock(&c->bucket_lock); + + trace_bcache_btree_node_alloc_fail(b); return b; } @@ -1137,11 +1142,8 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys, gc->nkeys++; gc->data += KEY_SIZE(k); - if (KEY_DIRTY(k)) { + if (KEY_DIRTY(k)) gc->dirty += KEY_SIZE(k); - if (d) - d->sectors_dirty_gc += KEY_SIZE(k); - } } for (t = b->sets; t <= &b->sets[b->nsets]; t++) @@ -1166,14 +1168,11 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, if (!IS_ERR_OR_NULL(n)) { swap(b, n); + __bkey_put(b->c, &b->key); memcpy(k->ptr, b->key.ptr, sizeof(uint64_t) * KEY_PTRS(&b->key)); - __bkey_put(b->c, &b->key); - atomic_inc(&b->c->prio_blocked); - b->prio_blocked++; - btree_node_free(n, op); up_write(&n->lock); } @@ -1278,7 +1277,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op, btree_node_free(r->b, op); up_write(&r->b->lock); - pr_debug("coalesced %u nodes", nodes); + trace_bcache_btree_gc_coalesce(nodes); gc->nodes--; nodes--; @@ -1293,14 +1292,9 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, void write(struct btree *r) { if (!r->written) - bch_btree_write(r, true, op); - else if (btree_node_dirty(r)) { - BUG_ON(btree_current_write(r)->owner); - btree_current_write(r)->owner = writes; - closure_get(writes); - - bch_btree_write(r, true, NULL); - } + bch_btree_node_write(r, &op->cl); + else if (btree_node_dirty(r)) + bch_btree_node_write(r, writes); up_write(&r->lock); } @@ -1386,9 +1380,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, ret = btree_gc_recurse(b, op, writes, gc); if (!b->written || btree_node_dirty(b)) { - atomic_inc(&b->c->prio_blocked); - b->prio_blocked++; - bch_btree_write(b, true, n ? op : NULL); + bch_btree_node_write(b, n ? &op->cl : NULL); } if (!IS_ERR_OR_NULL(n)) { @@ -1405,7 +1397,6 @@ static void btree_gc_start(struct cache_set *c) { struct cache *ca; struct bucket *b; - struct bcache_device **d; unsigned i; if (!c->gc_mark_valid) @@ -1419,16 +1410,12 @@ static void btree_gc_start(struct cache_set *c) for_each_cache(ca, c, i) for_each_bucket(b, ca) { b->gc_gen = b->gen; - if (!atomic_read(&b->pin)) + if (!atomic_read(&b->pin)) { SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_SECTORS_USED(b, 0); + } } - for (d = c->devices; - d < c->devices + c->nr_uuids; - d++) - if (*d) - (*d)->sectors_dirty_gc = 0; - mutex_unlock(&c->bucket_lock); } @@ -1437,7 +1424,6 @@ size_t bch_btree_gc_finish(struct cache_set *c) size_t available = 0; struct bucket *b; struct cache *ca; - struct bcache_device **d; unsigned i; mutex_lock(&c->bucket_lock); @@ -1480,22 +1466,6 @@ size_t bch_btree_gc_finish(struct cache_set *c) } } - for (d = c->devices; - d < c->devices + c->nr_uuids; - d++) - if (*d) { - unsigned long last = - atomic_long_read(&((*d)->sectors_dirty)); - long difference = (*d)->sectors_dirty_gc - last; - - pr_debug("sectors dirty off by %li", difference); - - (*d)->sectors_dirty_last += difference; - - atomic_long_set(&((*d)->sectors_dirty), - (*d)->sectors_dirty_gc); - } - mutex_unlock(&c->bucket_lock); return available; } @@ -1508,10 +1478,9 @@ static void bch_btree_gc(struct closure *cl) struct gc_stat stats; struct closure writes; struct btree_op op; - uint64_t start_time = local_clock(); - trace_bcache_gc_start(c->sb.set_uuid); - blktrace_msg_all(c, "Starting gc"); + + trace_bcache_gc_start(c); memset(&stats, 0, sizeof(struct gc_stat)); closure_init_stack(&writes); @@ -1520,14 +1489,14 @@ static void bch_btree_gc(struct closure *cl) btree_gc_start(c); + atomic_inc(&c->prio_blocked); + ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&op.cl); closure_sync(&writes); if (ret) { - blktrace_msg_all(c, "Stopped gc"); pr_warn("gc failed!"); - continue_at(cl, bch_btree_gc, bch_gc_wq); } @@ -1537,6 +1506,9 @@ static void bch_btree_gc(struct closure *cl) available = bch_btree_gc_finish(c); + atomic_dec(&c->prio_blocked); + wake_up_allocators(c); + bch_time_stats_update(&c->btree_gc_time, start_time); stats.key_bytes *= sizeof(uint64_t); @@ -1544,10 +1516,8 @@ static void bch_btree_gc(struct closure *cl) stats.data <<= 9; stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); - blktrace_msg_all(c, "Finished gc"); - trace_bcache_gc_end(c->sb.set_uuid); - wake_up(&c->alloc_wait); + trace_bcache_gc_end(c); continue_at(cl, bch_moving_gc, bch_gc_wq); } @@ -1654,14 +1624,14 @@ static bool fix_overlapping_extents(struct btree *b, struct btree_iter *iter, struct btree_op *op) { - void subtract_dirty(struct bkey *k, int sectors) + void subtract_dirty(struct bkey *k, uint64_t offset, int sectors) { - struct bcache_device *d = b->c->devices[KEY_INODE(k)]; - - if (KEY_DIRTY(k) && d) - atomic_long_sub(sectors, &d->sectors_dirty); + if (KEY_DIRTY(k)) + bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), + offset, -sectors); } + uint64_t old_offset; unsigned old_size, sectors_found = 0; while (1) { @@ -1673,6 +1643,7 @@ static bool fix_overlapping_extents(struct btree *b, if (bkey_cmp(k, &START_KEY(insert)) <= 0) continue; + old_offset = KEY_START(k); old_size = KEY_SIZE(k); /* @@ -1728,7 +1699,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *top; - subtract_dirty(k, KEY_SIZE(insert)); + subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert)); if (bkey_written(b, k)) { /* @@ -1775,7 +1746,7 @@ static bool fix_overlapping_extents(struct btree *b, } } - subtract_dirty(k, old_size - KEY_SIZE(k)); + subtract_dirty(k, old_offset, old_size - KEY_SIZE(k)); } check_failed: @@ -1798,7 +1769,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, { struct bset *i = b->sets[b->nsets].data; struct bkey *m, *prev; - const char *status = "insert"; + unsigned status = BTREE_INSERT_STATUS_INSERT; BUG_ON(bkey_cmp(k, &b->key) > 0); BUG_ON(b->level && !KEY_PTRS(k)); @@ -1831,17 +1802,17 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, goto insert; /* prev is in the tree, if we merge we're done */ - status = "back merging"; + status = BTREE_INSERT_STATUS_BACK_MERGE; if (prev && bch_bkey_try_merge(b, prev, k)) goto merged; - status = "overwrote front"; + status = BTREE_INSERT_STATUS_OVERWROTE; if (m != end(i) && KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) goto copy; - status = "front merge"; + status = BTREE_INSERT_STATUS_FRONT_MERGE; if (m != end(i) && bch_bkey_try_merge(b, k, m)) goto copy; @@ -1851,21 +1822,21 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, insert: shift_keys(b, m, k); copy: bkey_copy(m, k); merged: - bch_check_keys(b, "%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); - bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); + if (KEY_DIRTY(k)) + bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), + KEY_START(k), KEY_SIZE(k)); + + bch_check_keys(b, "%u for %s", status, op_type(op)); if (b->level && !KEY_OFFSET(k)) - b->prio_blocked++; + btree_current_write(b)->prio_blocked++; - pr_debug("%s for %s at %s: %s", status, - op_type(op), pbtree(b), pkey(k)); + trace_bcache_btree_insert_key(b, k, op->type, status); return true; } -bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) +static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) { bool ret = false; struct bkey *k; @@ -1896,7 +1867,7 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, should_split(b)) goto out; - op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio)); + op->replace = KEY(op->inode, bio_end_sector(bio), bio_sectors(bio)); SET_KEY_PTRS(&op->replace, 1); get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t)); @@ -1907,7 +1878,6 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, BUG_ON(op->type != BTREE_INSERT); BUG_ON(!btree_insert_key(b, op, &tmp.k)); - bch_btree_write(b, false, NULL); ret = true; out: downgrade_write(&b->lock); @@ -1929,12 +1899,11 @@ static int btree_split(struct btree *b, struct btree_op *op) split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; - pr_debug("%ssplitting at %s keys %i", split ? "" : "not ", - pbtree(b), n1->sets[0].data->keys); - if (split) { unsigned keys = 0; + trace_bcache_btree_node_split(b, n1->sets[0].data->keys); + n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); if (IS_ERR(n2)) goto err_free1; @@ -1967,18 +1936,21 @@ static int btree_split(struct btree *b, struct btree_op *op) bkey_copy_key(&n2->key, &b->key); bch_keylist_add(&op->keys, &n2->key); - bch_btree_write(n2, true, op); + bch_btree_node_write(n2, &op->cl); rw_unlock(true, n2); - } else + } else { + trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); + bch_btree_insert_keys(n1, op); + } bch_keylist_add(&op->keys, &n1->key); - bch_btree_write(n1, true, op); + bch_btree_node_write(n1, &op->cl); if (n3) { bkey_copy_key(&n3->key, &MAX_KEY); bch_btree_insert_keys(n3, op); - bch_btree_write(n3, true, op); + bch_btree_node_write(n3, &op->cl); closure_sync(&op->cl); bch_btree_set_root(n3); @@ -2082,8 +2054,12 @@ static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, BUG_ON(write_block(b) != b->sets[b->nsets].data); - if (bch_btree_insert_keys(b, op)) - bch_btree_write(b, false, op); + if (bch_btree_insert_keys(b, op)) { + if (!b->level) + bch_btree_leaf_dirty(b, op); + else + bch_btree_node_write(b, &op->cl); + } } return 0; @@ -2140,6 +2116,11 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) void bch_btree_set_root(struct btree *b) { unsigned i; + struct closure cl; + + closure_init_stack(&cl); + + trace_bcache_btree_set_root(b); BUG_ON(!b->written); @@ -2153,8 +2134,8 @@ void bch_btree_set_root(struct btree *b) b->c->root = b; __bkey_put(b->c, &b->key); - bch_journal_meta(b->c, NULL); - pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); + bch_journal_meta(b->c, &cl); + closure_sync(&cl); } /* Cache lookup */ @@ -2215,9 +2196,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, KEY_OFFSET(k) - bio->bi_sector); n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); - if (!n) - return -EAGAIN; - if (n == bio) op->lookup_done = true; @@ -2240,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, n->bi_end_io = bch_cache_read_endio; n->bi_private = &s->cl; - trace_bcache_cache_hit(n); __bch_submit_bbio(n, b->c); } @@ -2257,9 +2234,6 @@ int bch_btree_search_recurse(struct btree *b, struct btree_op *op) struct btree_iter iter; bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0)); - pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode, - (uint64_t) bio->bi_sector); - do { k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); if (!k) { @@ -2303,7 +2277,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, } static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, - struct keybuf *buf, struct bkey *end) + struct keybuf *buf, struct bkey *end, + keybuf_pred_fn *pred) { struct btree_iter iter; bch_btree_iter_init(b, &iter, &buf->last_scanned); @@ -2322,11 +2297,9 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (bkey_cmp(&buf->last_scanned, end) >= 0) break; - if (buf->key_predicate(buf, k)) { + if (pred(buf, k)) { struct keybuf_key *w; - pr_debug("%s", pkey(k)); - spin_lock(&buf->lock); w = array_alloc(&buf->freelist); @@ -2343,7 +2316,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, if (!k) break; - btree(refill_keybuf, k, b, op, buf, end); + btree(refill_keybuf, k, b, op, buf, end, pred); /* * Might get an error here, but can't really do anything * and it'll get logged elsewhere. Just read what we @@ -2361,7 +2334,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, } void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, - struct bkey *end) + struct bkey *end, keybuf_pred_fn *pred) { struct bkey start = buf->last_scanned; struct btree_op op; @@ -2369,7 +2342,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, cond_resched(); - btree_root(refill_keybuf, c, &op, buf, end); + btree_root(refill_keybuf, c, &op, buf, end, pred); closure_sync(&op.cl); pr_debug("found %s keys from %llu:%llu to %llu:%llu", @@ -2455,7 +2428,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf) struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, struct keybuf *buf, - struct bkey *end) + struct bkey *end, + keybuf_pred_fn *pred) { struct keybuf_key *ret; @@ -2469,15 +2443,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, break; } - bch_refill_keybuf(c, buf, end); + bch_refill_keybuf(c, buf, end, pred); } return ret; } -void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) +void bch_keybuf_init(struct keybuf *buf) { - buf->key_predicate = fn; buf->last_scanned = MAX_KEY; buf->keys = RB_ROOT; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index af4a7092a28c..3333d3723633 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -102,7 +102,6 @@ #include "debug.h" struct btree_write { - struct closure *owner; atomic_t *journal; /* If btree_split() frees a btree node, it writes a new pointer to that @@ -142,16 +141,12 @@ struct btree { */ struct bset_tree sets[MAX_BSETS]; - /* Used to refcount bio splits, also protects b->bio */ + /* For outstanding btree writes, used as a lock - protects write_idx */ struct closure_with_waitlist io; - /* Gets transferred to w->prio_blocked - see the comment there */ - int prio_blocked; - struct list_head list; struct delayed_work work; - uint64_t io_start_time; struct btree_write writes[2]; struct bio *bio; }; @@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b) \ { set_bit(BTREE_NODE_ ## flag, &b->flags); } \ enum btree_flags { - BTREE_NODE_read_done, BTREE_NODE_io_error, BTREE_NODE_dirty, BTREE_NODE_write_idx, }; -BTREE_FLAG(read_done); BTREE_FLAG(io_error); BTREE_FLAG(dirty); BTREE_FLAG(write_idx); @@ -278,6 +271,13 @@ struct btree_op { BKEY_PADDED(replace); }; +enum { + BTREE_INSERT_STATUS_INSERT, + BTREE_INSERT_STATUS_BACK_MERGE, + BTREE_INSERT_STATUS_OVERWROTE, + BTREE_INSERT_STATUS_FRONT_MERGE, +}; + void bch_btree_op_init_stack(struct btree_op *); static inline void rw_lock(bool w, struct btree *b, int level) @@ -293,9 +293,7 @@ static inline void rw_unlock(bool w, struct btree *b) #ifdef CONFIG_BCACHE_EDEBUG unsigned i; - if (w && - b->key.ptr[0] && - btree_node_read_done(b)) + if (w && b->key.ptr[0]) for (i = 0; i <= b->nsets; i++) bch_check_key_order(b, b->sets[i].data); #endif @@ -370,9 +368,8 @@ static inline bool should_split(struct btree *b) > btree_blocks(b)); } -void bch_btree_read_done(struct closure *); -void bch_btree_read(struct btree *); -void bch_btree_write(struct btree *b, bool now, struct btree_op *op); +void bch_btree_node_read(struct btree *); +void bch_btree_node_write(struct btree *, struct closure *); void bch_cannibalize_unlock(struct cache_set *, struct closure *); void bch_btree_set_root(struct btree *); @@ -380,7 +377,6 @@ struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *); struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, struct btree_op *); -bool bch_btree_insert_keys(struct btree *, struct btree_op *); bool bch_btree_insert_check_key(struct btree *, struct btree_op *, struct bio *); int bch_btree_insert(struct btree_op *, struct cache_set *); @@ -393,13 +389,14 @@ void bch_moving_gc(struct closure *); int bch_btree_check(struct cache_set *, struct btree_op *); uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); -void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); -void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); +void bch_keybuf_init(struct keybuf *); +void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *, + keybuf_pred_fn *); bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, struct bkey *); void bch_keybuf_del(struct keybuf *, struct keybuf_key *); struct keybuf_key *bch_keybuf_next(struct keybuf *); -struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, - struct keybuf *, struct bkey *); +struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *, + struct bkey *, keybuf_pred_fn *); #endif diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index bd05a9a8c7cf..9aba2017f0d1 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) } else { struct closure *parent = cl->parent; struct closure_waitlist *wait = closure_waitlist(cl); + closure_fn *destructor = cl->fn; closure_debug_destroy(cl); + smp_mb(); atomic_set(&cl->remaining, -1); if (wait) closure_wake_up(wait); - if (cl->fn) - cl->fn(cl); + if (destructor) + destructor(cl); if (parent) closure_put(parent); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 89fd5204924e..88e6411eab4f 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -47,11 +47,10 @@ const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) return ""; } -struct keyprint_hack bch_pkey(const struct bkey *k) +int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k) { unsigned i = 0; - struct keyprint_hack r; - char *out = r.s, *end = r.s + KEYHACK_SIZE; + char *out = buf, *end = buf + size; #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) @@ -75,16 +74,14 @@ struct keyprint_hack bch_pkey(const struct bkey *k) if (KEY_CSUM(k)) p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); #undef p - return r; + return out - buf; } -struct keyprint_hack bch_pbtree(const struct btree *b) +int bch_btree_to_text(char *buf, size_t size, const struct btree *b) { - struct keyprint_hack r; - - snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0), - b->level, b->c->root ? b->c->root->level : -1); - return r; + return scnprintf(buf, size, "%zu level %i/%i", + PTR_BUCKET_NR(b->c, &b->key, 0), + b->level, b->c->root ? b->c->root->level : -1); } #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG) @@ -100,10 +97,12 @@ static void dump_bset(struct btree *b, struct bset *i) { struct bkey *k; unsigned j; + char buf[80]; for (k = i->start; k < end(i); k = bkey_next(k)) { + bch_bkey_to_text(buf, sizeof(buf), k); printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b), - (uint64_t *) k - i->d, i->keys, pkey(k)); + (uint64_t *) k - i->d, i->keys, buf); for (j = 0; j < KEY_PTRS(k); j++) { size_t n = PTR_BUCKET_NR(b->c, k, j); @@ -144,7 +143,7 @@ void bch_btree_verify(struct btree *b, struct bset *new) v->written = 0; v->level = b->level; - bch_btree_read(v); + bch_btree_node_read(v); closure_wait_event(&v->io.wait, &cl, atomic_read(&b->io.cl.remaining) == -1); @@ -200,7 +199,7 @@ void bch_data_verify(struct search *s) if (!check) return; - if (bch_bio_alloc_pages(check, GFP_NOIO)) + if (bio_alloc_pages(check, GFP_NOIO)) goto out_put; check->bi_rw = READ_SYNC; @@ -252,6 +251,7 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt, va_list args) { unsigned i; + char buf[80]; console_lock(); @@ -262,7 +262,8 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt, console_unlock(); - panic("at %s\n", pbtree(b)); + bch_btree_to_text(buf, sizeof(buf), b); + panic("at %s\n", buf); } void bch_check_key_order_msg(struct btree *b, struct bset *i, @@ -337,6 +338,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, { struct dump_iterator *i = file->private_data; ssize_t ret = 0; + char kbuf[80]; while (size) { struct keybuf_key *w; @@ -355,11 +357,12 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf, if (i->bytes) break; - w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); + w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred); if (!w) break; - i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key)); + bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key); + i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf); bch_keybuf_del(&i->keys, w); } @@ -377,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file) file->private_data = i; i->c = c; - bch_keybuf_init(&i->keys, dump_pred); + bch_keybuf_init(&i->keys); i->keys.last_scanned = KEY(0, 0, 0); return 0; @@ -409,142 +412,6 @@ void bch_debug_init_cache_set(struct cache_set *c) #endif -/* Fuzz tester has rotted: */ -#if 0 - -static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, - const char *buffer, size_t size) -{ - void dump(struct btree *b) - { - struct bset *i; - - for (i = b->sets[0].data; - index(i, b) < btree_blocks(b) && - i->seq == b->sets[0].data->seq; - i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c)) - dump_bset(b, i); - } - - struct cache_sb *sb; - struct cache_set *c; - struct btree *all[3], *b, *fill, *orig; - int j; - - struct btree_op op; - bch_btree_op_init_stack(&op); - - sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL); - if (!sb) - return -ENOMEM; - - sb->bucket_size = 128; - sb->block_size = 4; - - c = bch_cache_set_alloc(sb); - if (!c) - return -ENOMEM; - - for (j = 0; j < 3; j++) { - BUG_ON(list_empty(&c->btree_cache)); - all[j] = list_first_entry(&c->btree_cache, struct btree, list); - list_del_init(&all[j]->list); - - all[j]->key = KEY(0, 0, c->sb.bucket_size); - bkey_copy_key(&all[j]->key, &MAX_KEY); - } - - b = all[0]; - fill = all[1]; - orig = all[2]; - - while (1) { - for (j = 0; j < 3; j++) - all[j]->written = all[j]->nsets = 0; - - bch_bset_init_next(b); - - while (1) { - struct bset *i = write_block(b); - struct bkey *k = op.keys.top; - unsigned rand; - - bkey_init(k); - rand = get_random_int(); - - op.type = rand & 1 - ? BTREE_INSERT - : BTREE_REPLACE; - rand >>= 1; - - SET_KEY_SIZE(k, bucket_remainder(c, rand)); - rand >>= c->bucket_bits; - rand &= 1024 * 512 - 1; - rand += c->sb.bucket_size; - SET_KEY_OFFSET(k, rand); -#if 0 - SET_KEY_PTRS(k, 1); -#endif - bch_keylist_push(&op.keys); - bch_btree_insert_keys(b, &op); - - if (should_split(b) || - set_blocks(i, b->c) != - __set_blocks(i, i->keys + 15, b->c)) { - i->csum = csum_set(i); - - memcpy(write_block(fill), - i, set_bytes(i)); - - b->written += set_blocks(i, b->c); - fill->written = b->written; - if (b->written == btree_blocks(b)) - break; - - bch_btree_sort_lazy(b); - bch_bset_init_next(b); - } - } - - memcpy(orig->sets[0].data, - fill->sets[0].data, - btree_bytes(c)); - - bch_btree_sort(b); - fill->written = 0; - bch_btree_read_done(&fill->io.cl); - - if (b->sets[0].data->keys != fill->sets[0].data->keys || - memcmp(b->sets[0].data->start, - fill->sets[0].data->start, - b->sets[0].data->keys * sizeof(uint64_t))) { - struct bset *i = b->sets[0].data; - struct bkey *k, *l; - - for (k = i->start, - l = fill->sets[0].data->start; - k < end(i); - k = bkey_next(k), l = bkey_next(l)) - if (bkey_cmp(k, l) || - KEY_SIZE(k) != KEY_SIZE(l)) - pr_err("key %zi differs: %s != %s", - (uint64_t *) k - i->d, - pkey(k), pkey(l)); - - for (j = 0; j < 3; j++) { - pr_err("**** Set %i ****", j); - dump(all[j]); - } - panic("\n"); - } - - pr_info("fuzz complete: %i keys", b->sets[0].data->keys); - } -} - -kobj_attribute_write(fuzz, btree_fuzz); -#endif - void bch_debug_exit(void) { if (!IS_ERR_OR_NULL(debug)) @@ -554,11 +421,6 @@ void bch_debug_exit(void) int __init bch_debug_init(struct kobject *kobj) { int ret = 0; -#if 0 - ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr); - if (ret) - return ret; -#endif debug = debugfs_create_dir("bcache", NULL); return ret; diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h index f9378a218148..1c39b5a2489b 100644 --- a/drivers/md/bcache/debug.h +++ b/drivers/md/bcache/debug.h @@ -3,15 +3,8 @@ /* Btree/bkey debug printing */ -#define KEYHACK_SIZE 80 -struct keyprint_hack { - char s[KEYHACK_SIZE]; -}; - -struct keyprint_hack bch_pkey(const struct bkey *k); -struct keyprint_hack bch_pbtree(const struct btree *b); -#define pkey(k) (&bch_pkey(k).s[0]) -#define pbtree(b) (&bch_pbtree(b).s[0]) +int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k); +int bch_btree_to_text(char *buf, size_t size, const struct btree *b); #ifdef CONFIG_BCACHE_EDEBUG diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 48efd4dea645..9056632995b1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -9,6 +9,8 @@ #include "bset.h" #include "debug.h" +#include <linux/blkdev.h> + static void bch_bi_idx_hack_endio(struct bio *bio, int error) { struct bio *p = bio->bi_private; @@ -66,13 +68,6 @@ static void bch_generic_make_request_hack(struct bio *bio) * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a * bvec boundry; it is the caller's responsibility to ensure that @bio is not * freed before the split. - * - * If bch_bio_split() is running under generic_make_request(), it's not safe to - * allocate more than one bio from the same bio set. Therefore, if it is running - * under generic_make_request() it masks out __GFP_WAIT when doing the - * allocation. The caller must check for failure if there's any possibility of - * it being called from under generic_make_request(); it is then the caller's - * responsibility to retry from a safe context (by e.g. punting to workqueue). */ struct bio *bch_bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) @@ -83,20 +78,13 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, BUG_ON(sectors <= 0); - /* - * If we're being called from underneath generic_make_request() and we - * already allocated any bios from this bio set, we risk deadlock if we - * use the mempool. So instead, we possibly fail and let the caller punt - * to workqueue or somesuch and retry in a safe context. - */ - if (current->bio_list) - gfp &= ~__GFP_WAIT; - if (sectors >= bio_sectors(bio)) return bio; if (bio->bi_rw & REQ_DISCARD) { ret = bio_alloc_bioset(gfp, 1, bs); + if (!ret) + return NULL; idx = 0; goto out; } @@ -160,17 +148,18 @@ static unsigned bch_bio_max_sectors(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, queue_max_segments(q)); - struct bio_vec *bv, *end = bio_iovec(bio) + - min_t(int, bio_segments(bio), max_segments); if (bio->bi_rw & REQ_DISCARD) return min(ret, q->limits.max_discard_sectors); if (bio_segments(bio) > max_segments || q->merge_bvec_fn) { + struct bio_vec *bv; + int i, seg = 0; + ret = 0; - for (bv = bio_iovec(bio); bv < end; bv++) { + bio_for_each_segment(bv, bio, i) { struct bvec_merge_data bvm = { .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_sector, @@ -178,10 +167,14 @@ static unsigned bch_bio_max_sectors(struct bio *bio) .bi_rw = bio->bi_rw, }; + if (seg == max_segments) + break; + if (q->merge_bvec_fn && q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) break; + seg++; ret += bv->bv_len >> 9; } } @@ -218,30 +211,10 @@ static void bch_bio_submit_split_endio(struct bio *bio, int error) closure_put(cl); } -static void __bch_bio_submit_split(struct closure *cl) -{ - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); - struct bio *bio = s->bio, *n; - - do { - n = bch_bio_split(bio, bch_bio_max_sectors(bio), - GFP_NOIO, s->p->bio_split); - if (!n) - continue_at(cl, __bch_bio_submit_split, system_wq); - - n->bi_end_io = bch_bio_submit_split_endio; - n->bi_private = cl; - - closure_get(cl); - bch_generic_make_request_hack(n); - } while (n != bio); - - continue_at(cl, bch_bio_submit_split_done, NULL); -} - void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) { struct bio_split_hook *s; + struct bio *n; if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) goto submit; @@ -250,6 +223,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) goto submit; s = mempool_alloc(p->bio_split_hook, GFP_NOIO); + closure_init(&s->cl, NULL); s->bio = bio; s->p = p; @@ -257,8 +231,18 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) s->bi_private = bio->bi_private; bio_get(bio); - closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL); - return; + do { + n = bch_bio_split(bio, bch_bio_max_sectors(bio), + GFP_NOIO, s->p->bio_split); + + n->bi_end_io = bch_bio_submit_split_endio; + n->bi_private = &s->cl; + + closure_get(&s->cl); + bch_generic_make_request_hack(n); + } while (n != bio); + + continue_at(&s->cl, bch_bio_submit_split_done, NULL); submit: bch_generic_make_request_hack(bio); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8c8dfdcd9d4c..ba95ab84b2be 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -9,6 +9,8 @@ #include "debug.h" #include "request.h" +#include <trace/events/bcache.h> + /* * Journal replay/recovery: * @@ -182,9 +184,14 @@ bsearch: pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; + m = (l + r) >> 1; + read_bucket(m); - if (read_bucket(m)) + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) l = m; else r = m; @@ -300,7 +307,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list, for (k = i->j.start; k < end(&i->j); k = bkey_next(k)) { - pr_debug("%s", pkey(k)); + trace_bcache_journal_replay_key(k); + bkey_copy(op->keys.top, k); bch_keylist_push(&op->keys); @@ -384,7 +392,7 @@ out: return; found: if (btree_node_dirty(best)) - bch_btree_write(best, true, NULL); + bch_btree_node_write(best, NULL); rw_unlock(true, best); } @@ -617,7 +625,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_sector = PTR_OFFSET(k, i); bio->bi_bdev = ca->bdev; - bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; bio->bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; @@ -712,7 +720,8 @@ void bch_journal(struct closure *cl) spin_lock(&c->journal.lock); if (journal_full(&c->journal)) { - /* XXX: tracepoint */ + trace_bcache_journal_full(c); + closure_wait(&c->journal.wait, cl); journal_reclaim(c); @@ -728,13 +737,15 @@ void bch_journal(struct closure *cl) if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || b > c->journal.blocks_free) { - /* XXX: If we were inserting so many keys that they won't fit in + trace_bcache_journal_entry_full(c); + + /* + * XXX: If we were inserting so many keys that they won't fit in * an _empty_ journal write, we'll deadlock. For now, handle * this in bch_keylist_realloc() - but something to think about. */ BUG_ON(!w->data->keys); - /* XXX: tracepoint */ BUG_ON(!closure_wait(&w->wait, cl)); closure_flush(&c->journal.io); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 8589512c972e..1a3b4f4786c3 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -9,6 +9,8 @@ #include "debug.h" #include "request.h" +#include <trace/events/bcache.h> + struct moving_io { struct keybuf_key *w; struct search s; @@ -44,14 +46,14 @@ static void write_moving_finish(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, s.cl); struct bio *bio = &io->bio.bio; - struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt); + struct bio_vec *bv; + int i; - while (bv-- != bio->bi_io_vec) + bio_for_each_segment_all(bv, bio, i) __free_page(bv->bv_page); - pr_debug("%s %s", io->s.op.insert_collision - ? "collision moving" : "moved", - pkey(&io->w->key)); + if (io->s.op.insert_collision) + trace_bcache_gc_copy_collision(&io->w->key); bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); @@ -94,8 +96,6 @@ static void write_moving(struct closure *cl) struct moving_io *io = container_of(s, struct moving_io, s); if (!s->error) { - trace_bcache_write_moving(&io->bio.bio); - moving_init(io); io->bio.bio.bi_sector = KEY_START(&io->w->key); @@ -122,7 +122,6 @@ static void read_moving_submit(struct closure *cl) struct moving_io *io = container_of(s, struct moving_io, s); struct bio *bio = &io->bio.bio; - trace_bcache_read_moving(bio); bch_submit_bbio(bio, s->op.c, &io->w->key, 0); continue_at(cl, write_moving, bch_gc_wq); @@ -138,7 +137,8 @@ static void read_moving(struct closure *cl) /* XXX: if we error, background writeback could stall indefinitely */ while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { - w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); + w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, + &MAX_KEY, moving_pred); if (!w) break; @@ -159,10 +159,10 @@ static void read_moving(struct closure *cl) bio->bi_rw = READ; bio->bi_end_io = read_moving_endio; - if (bch_bio_alloc_pages(bio, GFP_KERNEL)) + if (bio_alloc_pages(bio, GFP_KERNEL)) goto err; - pr_debug("%s", pkey(&w->key)); + trace_bcache_gc_copy(&w->key); closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); @@ -250,5 +250,5 @@ void bch_moving_gc(struct closure *cl) void bch_moving_init_cache_set(struct cache_set *c) { - bch_keybuf_init(&c->moving_gc_keys, moving_pred); + bch_keybuf_init(&c->moving_gc_keys); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e5ff12e52d5b..786a1a4f74d8 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -10,6 +10,7 @@ #include "btree.h" #include "debug.h" #include "request.h" +#include "writeback.h" #include <linux/cgroup.h> #include <linux/module.h> @@ -21,8 +22,6 @@ #define CUTOFF_CACHE_ADD 95 #define CUTOFF_CACHE_READA 90 -#define CUTOFF_WRITEBACK 50 -#define CUTOFF_WRITEBACK_SYNC 75 struct kmem_cache *bch_search_cache; @@ -489,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl) bch_queue_gc(op->c); } + /* + * Journal writes are marked REQ_FLUSH; if the original write was a + * flush, it'll wait on the journal write. + */ + bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA); + do { unsigned i; struct bkey *k; @@ -510,10 +515,6 @@ static void bch_insert_data_loop(struct closure *cl) goto err; n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); - if (!n) { - __bkey_put(op->c, k); - continue_at(cl, bch_insert_data_loop, bcache_wq); - } n->bi_end_io = bch_insert_data_endio; n->bi_private = cl; @@ -530,10 +531,9 @@ static void bch_insert_data_loop(struct closure *cl) if (KEY_CSUM(k)) bio_csum(n, k); - pr_debug("%s", pkey(k)); + trace_bcache_cache_insert(k); bch_keylist_push(&op->keys); - trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); n->bi_rw |= REQ_WRITE; bch_submit_bbio(n, op->c, k, 0); } while (n != bio); @@ -716,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) s->task = current; s->orig_bio = bio; s->write = (bio->bi_rw & REQ_WRITE) != 0; - s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; s->recoverable = 1; s->start_time = jiffies; @@ -784,11 +784,8 @@ static void request_read_error(struct closure *cl) int i; if (s->recoverable) { - /* The cache read failed, but we can retry from the backing - * device. - */ - pr_debug("recovering at sector %llu", - (uint64_t) s->orig_bio->bi_sector); + /* Retry from the backing device: */ + trace_bcache_read_retry(s->orig_bio); s->error = 0; bv = s->bio.bio.bi_io_vec; @@ -806,7 +803,6 @@ static void request_read_error(struct closure *cl) /* XXX: invalidate cache */ - trace_bcache_read_retry(&s->bio.bio); closure_bio_submit(&s->bio.bio, &s->cl, s->d); } @@ -827,53 +823,13 @@ static void request_read_done(struct closure *cl) */ if (s->op.cache_bio) { - struct bio_vec *src, *dst; - unsigned src_offset, dst_offset, bytes; - void *dst_ptr; - bio_reset(s->op.cache_bio); s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; bch_bio_map(s->op.cache_bio, NULL); - src = bio_iovec(s->op.cache_bio); - dst = bio_iovec(s->cache_miss); - src_offset = src->bv_offset; - dst_offset = dst->bv_offset; - dst_ptr = kmap(dst->bv_page); - - while (1) { - if (dst_offset == dst->bv_offset + dst->bv_len) { - kunmap(dst->bv_page); - dst++; - if (dst == bio_iovec_idx(s->cache_miss, - s->cache_miss->bi_vcnt)) - break; - - dst_offset = dst->bv_offset; - dst_ptr = kmap(dst->bv_page); - } - - if (src_offset == src->bv_offset + src->bv_len) { - src++; - if (src == bio_iovec_idx(s->op.cache_bio, - s->op.cache_bio->bi_vcnt)) - BUG(); - - src_offset = src->bv_offset; - } - - bytes = min(dst->bv_offset + dst->bv_len - dst_offset, - src->bv_offset + src->bv_len - src_offset); - - memcpy(dst_ptr + dst_offset, - page_address(src->bv_page) + src_offset, - bytes); - - src_offset += bytes; - dst_offset += bytes; - } + bio_copy_data(s->cache_miss, s->op.cache_bio); bio_put(s->cache_miss); s->cache_miss = NULL; @@ -899,6 +855,7 @@ static void request_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); + trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip); if (s->error) continue_at_nobarrier(cl, request_read_error, bcache_wq); @@ -917,9 +874,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct bio *miss; miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); - if (!miss) - return -EAGAIN; - if (miss == bio) s->op.lookup_done = true; @@ -938,8 +892,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, reada = min(dc->readahead >> 9, sectors - bio_sectors(miss)); - if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) - reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); + if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev)) + reada = bdev_sectors(miss->bi_bdev) - + bio_end_sector(miss); } s->cache_bio_sectors = bio_sectors(miss) + reada; @@ -963,13 +918,12 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_put; bch_bio_map(s->op.cache_bio, NULL); - if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; s->cache_miss = miss; bio_get(s->op.cache_bio); - trace_bcache_cache_miss(s->orig_bio); closure_bio_submit(s->op.cache_bio, &s->cl, s->d); return ret; @@ -1002,24 +956,13 @@ static void cached_dev_write_complete(struct closure *cl) cached_dev_bio_complete(cl); } -static bool should_writeback(struct cached_dev *dc, struct bio *bio) -{ - unsigned threshold = (bio->bi_rw & REQ_SYNC) - ? CUTOFF_WRITEBACK_SYNC - : CUTOFF_WRITEBACK; - - return !atomic_read(&dc->disk.detaching) && - cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && - dc->disk.c->gc_stats.in_use < threshold; -} - static void request_write(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl; struct bio *bio = &s->bio.bio; struct bkey start, end; start = KEY(dc->disk.id, bio->bi_sector, 0); - end = KEY(dc->disk.id, bio_end(bio), 0); + end = KEY(dc->disk.id, bio_end_sector(bio), 0); bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); @@ -1034,22 +977,37 @@ static void request_write(struct cached_dev *dc, struct search *s) if (bio->bi_rw & REQ_DISCARD) goto skip; + if (should_writeback(dc, s->orig_bio, + cache_mode(dc, bio), + s->op.skip)) { + s->op.skip = false; + s->writeback = true; + } + if (s->op.skip) goto skip; - if (should_writeback(dc, s->orig_bio)) - s->writeback = true; + trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); if (!s->writeback) { s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, dc->disk.bio_split); - trace_bcache_writethrough(s->orig_bio); closure_bio_submit(bio, cl, s->d); } else { - s->op.cache_bio = bio; - trace_bcache_writeback(s->orig_bio); - bch_writeback_add(dc, bio_sectors(bio)); + bch_writeback_add(dc); + + if (s->op.flush_journal) { + /* Also need to send a flush to the backing device */ + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + bio->bi_size = 0; + bio->bi_vcnt = 0; + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + } } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); @@ -1058,7 +1016,6 @@ skip: s->op.skip = true; s->op.cache_bio = s->orig_bio; bio_get(s->op.cache_bio); - trace_bcache_write_skip(s->orig_bio); if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(dc->bdev))) @@ -1088,9 +1045,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s) /* Cached devices - read & write stuff */ -int bch_get_congested(struct cache_set *c) +unsigned bch_get_congested(struct cache_set *c) { int i; + long rand; if (!c->congested_read_threshold_us && !c->congested_write_threshold_us) @@ -1106,7 +1064,13 @@ int bch_get_congested(struct cache_set *c) i += CONGESTED_MAX; - return i <= 0 ? 1 : fract_exp_two(i, 6); + if (i > 0) + i = fract_exp_two(i, 6); + + rand = get_random_int(); + i -= bitmap_weight(&rand, BITS_PER_LONG); + + return i > 0 ? i : 1; } static void add_sequential(struct task_struct *t) @@ -1126,10 +1090,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) { struct cache_set *c = s->op.c; struct bio *bio = &s->bio.bio; - - long rand; - int cutoff = bch_get_congested(c); unsigned mode = cache_mode(dc, bio); + unsigned sectors, congested = bch_get_congested(c); if (atomic_read(&dc->disk.detaching) || c->gc_stats.in_use > CUTOFF_CACHE_ADD || @@ -1147,17 +1109,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) goto skip; } - if (!cutoff) { - cutoff = dc->sequential_cutoff >> 9; + if (!congested && !dc->sequential_cutoff) + goto rescale; - if (!cutoff) - goto rescale; - - if (mode == CACHE_MODE_WRITEBACK && - (bio->bi_rw & REQ_WRITE) && - (bio->bi_rw & REQ_SYNC)) - goto rescale; - } + if (!congested && + mode == CACHE_MODE_WRITEBACK && + (bio->bi_rw & REQ_WRITE) && + (bio->bi_rw & REQ_SYNC)) + goto rescale; if (dc->sequential_merge) { struct io *i; @@ -1177,7 +1136,7 @@ found: if (i->sequential + bio->bi_size > i->sequential) i->sequential += bio->bi_size; - i->last = bio_end(bio); + i->last = bio_end_sector(bio); i->jiffies = jiffies + msecs_to_jiffies(5000); s->task->sequential_io = i->sequential; @@ -1192,12 +1151,19 @@ found: add_sequential(s->task); } - rand = get_random_int(); - cutoff -= bitmap_weight(&rand, BITS_PER_LONG); + sectors = max(s->task->sequential_io, + s->task->sequential_io_avg) >> 9; - if (cutoff <= (int) (max(s->task->sequential_io, - s->task->sequential_io_avg) >> 9)) + if (dc->sequential_cutoff && + sectors >= dc->sequential_cutoff >> 9) { + trace_bcache_bypass_sequential(s->orig_bio); goto skip; + } + + if (congested && sectors >= congested) { + trace_bcache_bypass_congested(s->orig_bio); + goto skip; + } rescale: bch_rescale_priorities(c, bio_sectors(bio)); @@ -1288,30 +1254,25 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { + struct bio_vec *bv; + int i; + /* Zero fill bio */ - while (bio->bi_idx != bio->bi_vcnt) { - struct bio_vec *bv = bio_iovec(bio); + bio_for_each_segment(bv, bio, i) { unsigned j = min(bv->bv_len >> 9, sectors); void *p = kmap(bv->bv_page); memset(p + bv->bv_offset, 0, j << 9); kunmap(bv->bv_page); - bv->bv_len -= j << 9; - bv->bv_offset += j << 9; - - if (bv->bv_len) - return 0; - - bio->bi_sector += j; - bio->bi_size -= j << 9; - - bio->bi_idx++; - sectors -= j; + sectors -= j; } - s->op.lookup_done = true; + bio_advance(bio, min(sectors << 9, bio->bi_size)); + + if (!bio->bi_size) + s->op.lookup_done = true; return 0; } @@ -1338,8 +1299,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) closure_call(&s->op.cl, btree_read_async, NULL, cl); } else if (bio_has_data(bio) || s->op.skip) { bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, - &KEY(d->id, bio->bi_sector, 0), - &KEY(d->id, bio_end(bio), 0)); + &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio_end_sector(bio), 0)); s->writeback = true; s->op.cache_bio = bio; diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 254d9ab5707c..57dc4784f4f4 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -30,7 +30,7 @@ struct search { }; void bch_cache_read_endio(struct bio *, int); -int bch_get_congested(struct cache_set *); +unsigned bch_get_congested(struct cache_set *); void bch_insert_data(struct closure *cl); void bch_btree_insert_async(struct closure *); void bch_cache_read_endio(struct bio *, int); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f88e2b653a3f..547c4c57b052 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -10,10 +10,13 @@ #include "btree.h" #include "debug.h" #include "request.h" +#include "writeback.h" +#include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/debugfs.h> #include <linux/genhd.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/random.h> #include <linux/reboot.h> @@ -342,6 +345,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, struct closure *cl = &c->uuid_write.cl; struct uuid_entry *u; unsigned i; + char buf[80]; BUG_ON(!parent); closure_lock(&c->uuid_write, parent); @@ -362,8 +366,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw, break; } - pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", - pkey(&c->uuid_bucket)); + bch_bkey_to_text(buf, sizeof(buf), k); + pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) @@ -543,7 +547,6 @@ void bch_prio_write(struct cache *ca) pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), fifo_used(&ca->free_inc), fifo_used(&ca->unused)); - blktrace_msg(ca, "Starting priorities: " buckets_free(ca)); for (i = prio_buckets(ca) - 1; i >= 0; --i) { long bucket; @@ -704,7 +707,8 @@ static void bcache_device_detach(struct bcache_device *d) atomic_set(&d->detaching, 0); } - bcache_device_unlink(d); + if (!d->flush_done) + bcache_device_unlink(d); d->c->devices[d->id] = NULL; closure_put(&d->c->caching); @@ -743,13 +747,35 @@ static void bcache_device_free(struct bcache_device *d) mempool_destroy(d->unaligned_bvec); if (d->bio_split) bioset_free(d->bio_split); + if (is_vmalloc_addr(d->stripe_sectors_dirty)) + vfree(d->stripe_sectors_dirty); + else + kfree(d->stripe_sectors_dirty); closure_debug_destroy(&d->cl); } -static int bcache_device_init(struct bcache_device *d, unsigned block_size) +static int bcache_device_init(struct bcache_device *d, unsigned block_size, + sector_t sectors) { struct request_queue *q; + size_t n; + + if (!d->stripe_size_bits) + d->stripe_size_bits = 31; + + d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >> + d->stripe_size_bits; + + if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) + return -ENOMEM; + + n = d->nr_stripes * sizeof(atomic_t); + d->stripe_sectors_dirty = n < PAGE_SIZE << 6 + ? kzalloc(n, GFP_KERNEL) + : vzalloc(n); + if (!d->stripe_sectors_dirty) + return -ENOMEM; if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, @@ -759,6 +785,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size) !(q = blk_alloc_queue(GFP_KERNEL))) return -ENOMEM; + set_capacity(d->disk, sectors); snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); d->disk->major = bcache_major; @@ -781,6 +808,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size) set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + blk_queue_flush(q, REQ_FLUSH|REQ_FUA); + return 0; } @@ -800,6 +829,17 @@ static void calc_cached_dev_sectors(struct cache_set *c) void bch_cached_dev_run(struct cached_dev *dc) { struct bcache_device *d = &dc->disk; + char buf[SB_LABEL_SIZE + 1]; + char *env[] = { + "DRIVER=bcache", + kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid), + NULL, + NULL, + }; + + memcpy(buf, dc->sb.label, SB_LABEL_SIZE); + buf[SB_LABEL_SIZE] = '\0'; + env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); if (atomic_xchg(&dc->running, 1)) return; @@ -816,10 +856,12 @@ void bch_cached_dev_run(struct cached_dev *dc) add_disk(d->disk); bd_link_disk_holder(dc->bdev, dc->disk.disk); -#if 0 - char *env[] = { "SYMLINK=label" , NULL }; + /* won't show up in the uevent file, use udevadm monitor -e instead + * only class / kset properties are persistent */ kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); -#endif + kfree(env[1]); + kfree(env[2]); + if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) pr_debug("error creating sysfs link"); @@ -960,6 +1002,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) atomic_set(&dc->count, 1); if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { + bch_sectors_dirty_init(dc); atomic_set(&dc->has_dirty, 1); atomic_inc(&dc->count); bch_writeback_queue(dc); @@ -1014,6 +1057,14 @@ static void cached_dev_flush(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; + mutex_lock(&bch_register_lock); + d->flush_done = 1; + + if (d->c) + bcache_device_unlink(d); + + mutex_unlock(&bch_register_lock); + bch_cache_accounting_destroy(&dc->accounting); kobject_del(&d->kobj); @@ -1045,7 +1096,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); } - ret = bcache_device_init(&dc->disk, block_size); + ret = bcache_device_init(&dc->disk, block_size, + dc->bdev->bd_part->nr_sects - dc->sb.data_offset); if (ret) return ret; @@ -1144,11 +1196,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) kobject_init(&d->kobj, &bch_flash_dev_ktype); - if (bcache_device_init(d, block_bytes(c))) + if (bcache_device_init(d, block_bytes(c), u->sectors)) goto err; bcache_device_attach(d, c, u - c->uuids); - set_capacity(d->disk, u->sectors); bch_flash_dev_request_init(d); add_disk(d->disk); @@ -1255,9 +1306,10 @@ static void cache_set_free(struct closure *cl) free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); - kfree(c->fill_iter); if (c->bio_split) bioset_free(c->bio_split); + if (c->fill_iter) + mempool_destroy(c->fill_iter); if (c->bio_meta) mempool_destroy(c->bio_meta); if (c->search) @@ -1278,11 +1330,9 @@ static void cache_set_free(struct closure *cl) static void cache_set_flush(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); + struct cache *ca; struct btree *b; - - /* Shut down allocator threads */ - set_bit(CACHE_SET_STOPPING_2, &c->flags); - wake_up(&c->alloc_wait); + unsigned i; bch_cache_accounting_destroy(&c->accounting); @@ -1295,7 +1345,11 @@ static void cache_set_flush(struct closure *cl) /* Should skip this if we're unregistering because of an error */ list_for_each_entry(b, &c->btree_cache, list) if (btree_node_dirty(b)) - bch_btree_write(b, true, NULL); + bch_btree_node_write(b, NULL); + + for_each_cache(ca, c, i) + if (ca->alloc_thread) + kthread_stop(ca->alloc_thread); closure_return(cl); } @@ -1303,18 +1357,22 @@ static void cache_set_flush(struct closure *cl) static void __cache_set_unregister(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); - struct cached_dev *dc, *t; + struct cached_dev *dc; size_t i; mutex_lock(&bch_register_lock); - if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) - list_for_each_entry_safe(dc, t, &c->cached_devs, list) - bch_cached_dev_detach(dc); - for (i = 0; i < c->nr_uuids; i++) - if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) - bcache_device_stop(c->devices[i]); + if (c->devices[i]) { + if (!UUID_FLASH_ONLY(&c->uuids[i]) && + test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + dc = container_of(c->devices[i], + struct cached_dev, disk); + bch_cached_dev_detach(dc); + } else { + bcache_device_stop(c->devices[i]); + } + } mutex_unlock(&bch_register_lock); @@ -1373,9 +1431,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->btree_pages = max_t(int, c->btree_pages / 4, BTREE_MAX_PAGES); - init_waitqueue_head(&c->alloc_wait); + c->sort_crit_factor = int_sqrt(c->btree_pages); + mutex_init(&c->bucket_lock); - mutex_init(&c->fill_lock); mutex_init(&c->sort_lock); spin_lock_init(&c->sort_time_lock); closure_init_unlocked(&c->sb_write); @@ -1401,8 +1459,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) !(c->bio_meta = mempool_create_kmalloc_pool(2, sizeof(struct bbio) + sizeof(struct bio_vec) * bucket_pages(c))) || + !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || - !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) || !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || bch_journal_alloc(c) || @@ -1410,8 +1468,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) bch_open_buckets_alloc(c)) goto err; - c->fill_iter->size = sb->bucket_size / sb->block_size; - c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; c->error_limit = 8 << IO_ERROR_SHIFT; @@ -1496,9 +1552,10 @@ static void run_cache_set(struct cache_set *c) */ bch_journal_next(&c->journal); + err = "error starting allocator thread"; for_each_cache(ca, c, i) - closure_call(&ca->alloc, bch_allocator_thread, - system_wq, &c->cl); + if (bch_cache_allocator_start(ca)) + goto err; /* * First place it's safe to allocate: btree_check() and @@ -1531,17 +1588,16 @@ static void run_cache_set(struct cache_set *c) bch_btree_gc_finish(c); + err = "error starting allocator thread"; for_each_cache(ca, c, i) - closure_call(&ca->alloc, bch_allocator_thread, - ca->alloc_workqueue, &c->cl); + if (bch_cache_allocator_start(ca)) + goto err; mutex_lock(&c->bucket_lock); for_each_cache(ca, c, i) bch_prio_write(ca); mutex_unlock(&c->bucket_lock); - wake_up(&c->alloc_wait); - err = "cannot allocate new UUID bucket"; if (__uuid_write(c)) goto err_unlock_gc; @@ -1552,7 +1608,7 @@ static void run_cache_set(struct cache_set *c) goto err_unlock_gc; bkey_copy_key(&c->root->key, &MAX_KEY); - bch_btree_write(c->root, true, &op); + bch_btree_node_write(c->root, &op.cl); bch_btree_set_root(c->root); rw_unlock(true, c->root); @@ -1673,9 +1729,6 @@ void bch_cache_release(struct kobject *kobj) bio_split_pool_free(&ca->bio_split_hook); - if (ca->alloc_workqueue) - destroy_workqueue(ca->alloc_workqueue); - free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); kfree(ca->prio_buckets); vfree(ca->buckets); @@ -1723,7 +1776,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * 2, GFP_KERNEL)) || !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || - !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) || bio_split_pool_init(&ca->bio_split_hook)) return -ENOMEM; @@ -1786,6 +1838,36 @@ static ssize_t register_bcache(struct kobject *, struct kobj_attribute *, kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); +static bool bch_is_open_backing(struct block_device *bdev) { + struct cache_set *c, *tc; + struct cached_dev *dc, *t; + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + list_for_each_entry_safe(dc, t, &c->cached_devs, list) + if (dc->bdev == bdev) + return true; + list_for_each_entry_safe(dc, t, &uncached_devices, list) + if (dc->bdev == bdev) + return true; + return false; +} + +static bool bch_is_open_cache(struct block_device *bdev) { + struct cache_set *c, *tc; + struct cache *ca; + unsigned i; + + list_for_each_entry_safe(c, tc, &bch_cache_sets, list) + for_each_cache(ca, c, i) + if (ca->bdev == bdev) + return true; + return false; +} + +static bool bch_is_open(struct block_device *bdev) { + return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -1810,8 +1892,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) { - if (bdev == ERR_PTR(-EBUSY)) - err = "device busy"; + if (bdev == ERR_PTR(-EBUSY)) { + bdev = lookup_bdev(strim(path)); + if (!IS_ERR(bdev) && bch_is_open(bdev)) + err = "device already registered"; + else + err = "device busy"; + } goto err; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 4d9cca47e4c6..12a2c2846f99 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -9,7 +9,9 @@ #include "sysfs.h" #include "btree.h" #include "request.h" +#include "writeback.h" +#include <linux/blkdev.h> #include <linux/sort.h> static const char * const cache_replacement_policies[] = { @@ -79,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse); rw_attribute(writeback_rate_d_smooth); read_attribute(writeback_rate_debug); +read_attribute(stripe_size); +read_attribute(partial_stripes_expensive); + rw_attribute(synchronous); rw_attribute(journal_delay_ms); rw_attribute(discard); @@ -127,7 +132,7 @@ SHOW(__bch_cached_dev) char derivative[20]; char target[20]; bch_hprint(dirty, - atomic_long_read(&dc->disk.sectors_dirty) << 9); + bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(derivative, dc->writeback_rate_derivative << 9); bch_hprint(target, dc->writeback_rate_target << 9); @@ -143,7 +148,10 @@ SHOW(__bch_cached_dev) } sysfs_hprint(dirty_data, - atomic_long_read(&dc->disk.sectors_dirty) << 9); + bcache_dev_sectors_dirty(&dc->disk) << 9); + + sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9); + var_printf(partial_stripes_expensive, "%u"); var_printf(sequential_merge, "%i"); var_hprint(sequential_cutoff); @@ -170,6 +178,7 @@ STORE(__cached_dev) disk.kobj); unsigned v = size; struct cache_set *c; + struct kobj_uevent_env *env; #define d_strtoul(var) sysfs_strtoul(var, dc->var) #define d_strtoi_h(var) sysfs_hatoi(var, dc->var) @@ -214,6 +223,7 @@ STORE(__cached_dev) } if (attr == &sysfs_label) { + /* note: endlines are preserved */ memcpy(dc->sb.label, buf, SB_LABEL_SIZE); bch_write_bdev_super(dc, NULL); if (dc->disk.c) { @@ -221,6 +231,15 @@ STORE(__cached_dev) buf, SB_LABEL_SIZE); bch_uuid_write(dc->disk.c); } + env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + add_uevent_var(env, "DRIVER=bcache"); + add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), + add_uevent_var(env, "CACHED_LABEL=%s", buf); + kobject_uevent_env( + &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp); + kfree(env); } if (attr == &sysfs_attach) { @@ -284,6 +303,8 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_d_smooth, &sysfs_writeback_rate_debug, &sysfs_dirty_data, + &sysfs_stripe_size, + &sysfs_partial_stripes_expensive, &sysfs_sequential_cutoff, &sysfs_sequential_merge, &sysfs_clear_stats, @@ -665,12 +686,10 @@ SHOW(__bch_cache) int cmp(const void *l, const void *r) { return *((uint16_t *) r) - *((uint16_t *) l); } - /* Number of quantiles we compute */ - const unsigned nq = 31; - size_t n = ca->sb.nbuckets, i, unused, btree; uint64_t sum = 0; - uint16_t q[nq], *p, *cached; + /* Compute 31 quantiles */ + uint16_t q[31], *p, *cached; ssize_t ret; cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t)); @@ -703,26 +722,29 @@ SHOW(__bch_cache) if (n) do_div(sum, n); - for (i = 0; i < nq; i++) - q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)]; + for (i = 0; i < ARRAY_SIZE(q); i++) + q[i] = INITIAL_PRIO - cached[n * (i + 1) / + (ARRAY_SIZE(q) + 1)]; vfree(p); - ret = snprintf(buf, PAGE_SIZE, - "Unused: %zu%%\n" - "Metadata: %zu%%\n" - "Average: %llu\n" - "Sectors per Q: %zu\n" - "Quantiles: [", - unused * 100 / (size_t) ca->sb.nbuckets, - btree * 100 / (size_t) ca->sb.nbuckets, sum, - n * ca->sb.bucket_size / (nq + 1)); - - for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++) - ret += snprintf(buf + ret, PAGE_SIZE - ret, - i < nq - 1 ? "%u " : "%u]\n", q[i]); - - buf[PAGE_SIZE - 1] = '\0'; + ret = scnprintf(buf, PAGE_SIZE, + "Unused: %zu%%\n" + "Metadata: %zu%%\n" + "Average: %llu\n" + "Sectors per Q: %zu\n" + "Quantiles: [", + unused * 100 / (size_t) ca->sb.nbuckets, + btree * 100 / (size_t) ca->sb.nbuckets, sum, + n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1)); + + for (i = 0; i < ARRAY_SIZE(q); i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%u ", q[i]); + ret--; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n"); + return ret; } diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index 983f9bb411bc..f7b6c197f90f 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -2,6 +2,7 @@ #include "btree.h" #include "request.h" +#include <linux/blktrace_api.h> #include <linux/module.h> #define CREATE_TRACE_POINTS @@ -9,18 +10,44 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize); + EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision); diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index da3a99e85b1e..98eb81159a22 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -228,23 +228,6 @@ start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, } } -int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp) -{ - int i; - struct bio_vec *bv; - - bio_for_each_segment(bv, bio, i) { - bv->bv_page = alloc_page(gfp); - if (!bv->bv_page) { - while (bv-- != bio->bi_io_vec + bio->bi_idx) - __free_page(bv->bv_page); - return -ENOMEM; - } - } - - return 0; -} - /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any * use permitted, subject to terms of PostgreSQL license; see.) diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 577393e38c3a..1ae2a73ad85f 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -15,8 +15,6 @@ struct closure; -#include <trace/events/bcache.h> - #ifdef CONFIG_BCACHE_EDEBUG #define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) @@ -566,12 +564,8 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) return x; } -#define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) - void bch_bio_map(struct bio *bio, void *base); -int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp); - static inline sector_t bdev_sectors(struct block_device *bdev) { return bdev->bd_inode->i_size >> 9; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 2714ed3991d1..22cbff551628 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -9,6 +9,9 @@ #include "bcache.h" #include "btree.h" #include "debug.h" +#include "writeback.h" + +#include <trace/events/bcache.h> static struct workqueue_struct *dirty_wq; @@ -36,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc) int change = 0; int64_t error; - int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); + int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); int64_t derivative = dirty - dc->disk.sectors_dirty_last; dc->disk.sectors_dirty_last = dirty; @@ -105,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) return KEY_DIRTY(k); } +static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) +{ + uint64_t stripe; + unsigned nr_sectors = KEY_SIZE(k); + struct cached_dev *dc = container_of(buf, struct cached_dev, + writeback_keys); + unsigned stripe_size = 1 << dc->disk.stripe_size_bits; + + if (!KEY_DIRTY(k)) + return false; + + stripe = KEY_START(k) >> dc->disk.stripe_size_bits; + while (1) { + if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) != + stripe_size) + return false; + + if (nr_sectors <= stripe_size) + return true; + + nr_sectors -= stripe_size; + stripe++; + } +} + static void dirty_init(struct keybuf_key *w) { struct dirty_io *io = w->private; @@ -149,7 +177,22 @@ static void refill_dirty(struct closure *cl) searched_from_start = true; } - bch_refill_keybuf(dc->disk.c, buf, &end); + if (dc->partial_stripes_expensive) { + uint64_t i; + + for (i = 0; i < dc->disk.nr_stripes; i++) + if (atomic_read(dc->disk.stripe_sectors_dirty + i) == + 1 << dc->disk.stripe_size_bits) + goto full_stripes; + + goto normal_refill; +full_stripes: + bch_refill_keybuf(dc->disk.c, buf, &end, + dirty_full_stripe_pred); + } else { +normal_refill: + bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); + } if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { /* Searched the entire btree - delay awhile */ @@ -181,10 +224,8 @@ void bch_writeback_queue(struct cached_dev *dc) } } -void bch_writeback_add(struct cached_dev *dc, unsigned sectors) +void bch_writeback_add(struct cached_dev *dc) { - atomic_long_add(sectors, &dc->disk.sectors_dirty); - if (!atomic_read(&dc->has_dirty) && !atomic_xchg(&dc->has_dirty, 1)) { atomic_inc(&dc->count); @@ -203,6 +244,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors) } } +void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, + uint64_t offset, int nr_sectors) +{ + struct bcache_device *d = c->devices[inode]; + unsigned stripe_size, stripe_offset; + uint64_t stripe; + + if (!d) + return; + + stripe_size = 1 << d->stripe_size_bits; + stripe = offset >> d->stripe_size_bits; + stripe_offset = offset & (stripe_size - 1); + + while (nr_sectors) { + int s = min_t(unsigned, abs(nr_sectors), + stripe_size - stripe_offset); + + if (nr_sectors < 0) + s = -s; + + atomic_add(s, d->stripe_sectors_dirty + stripe); + nr_sectors -= s; + stripe_offset = 0; + stripe++; + } +} + /* Background writeback - IO loop */ static void dirty_io_destructor(struct closure *cl) @@ -216,9 +285,10 @@ static void write_dirty_finish(struct closure *cl) struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; - struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); + struct bio_vec *bv; + int i; - while (bv-- != io->bio.bi_io_vec) + bio_for_each_segment_all(bv, &io->bio, i) __free_page(bv->bv_page); /* This is kind of a dumb way of signalling errors. */ @@ -236,10 +306,12 @@ static void write_dirty_finish(struct closure *cl) for (i = 0; i < KEY_PTRS(&w->key); i++) atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); - pr_debug("clearing %s", pkey(&w->key)); bch_btree_insert(&op, dc->disk.c); closure_sync(&op.cl); + if (op.insert_collision) + trace_bcache_writeback_collision(&w->key); + atomic_long_inc(op.insert_collision ? &dc->disk.c->writeback_keys_failed : &dc->disk.c->writeback_keys_done); @@ -275,7 +347,6 @@ static void write_dirty(struct closure *cl) io->bio.bi_bdev = io->dc->bdev; io->bio.bi_end_io = dirty_endio; - trace_bcache_write_dirty(&io->bio); closure_bio_submit(&io->bio, cl, &io->dc->disk); continue_at(cl, write_dirty_finish, dirty_wq); @@ -296,7 +367,6 @@ static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); - trace_bcache_read_dirty(&io->bio); closure_bio_submit(&io->bio, cl, &io->dc->disk); continue_at(cl, write_dirty, dirty_wq); @@ -349,10 +419,10 @@ static void read_dirty(struct closure *cl) io->bio.bi_rw = READ; io->bio.bi_end_io = read_dirty_endio; - if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) + if (bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; - pr_debug("%s", pkey(&w->key)); + trace_bcache_writeback(&w->key); closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); @@ -375,12 +445,49 @@ err: refill_dirty(cl); } +/* Init */ + +static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op, + struct cached_dev *dc) +{ + struct bkey *k; + struct btree_iter iter; + + bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0)); + while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) + if (!b->level) { + if (KEY_INODE(k) > dc->disk.id) + break; + + if (KEY_DIRTY(k)) + bcache_dev_sectors_dirty_add(b->c, dc->disk.id, + KEY_START(k), + KEY_SIZE(k)); + } else { + btree(sectors_dirty_init, k, b, op, dc); + if (KEY_INODE(k) > dc->disk.id) + break; + + cond_resched(); + } + + return 0; +} + +void bch_sectors_dirty_init(struct cached_dev *dc) +{ + struct btree_op op; + + bch_btree_op_init_stack(&op); + btree_root(sectors_dirty_init, dc->disk.c, &op, dc); +} + void bch_cached_dev_writeback_init(struct cached_dev *dc) { closure_init_unlocked(&dc->writeback); init_rwsem(&dc->writeback_lock); - bch_keybuf_init(&dc->writeback_keys, dirty_pred); + bch_keybuf_init(&dc->writeback_keys); dc->writeback_metadata = true; dc->writeback_running = true; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h new file mode 100644 index 000000000000..c91f61bb95b6 --- /dev/null +++ b/drivers/md/bcache/writeback.h @@ -0,0 +1,64 @@ +#ifndef _BCACHE_WRITEBACK_H +#define _BCACHE_WRITEBACK_H + +#define CUTOFF_WRITEBACK 40 +#define CUTOFF_WRITEBACK_SYNC 70 + +static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) +{ + uint64_t i, ret = 0; + + for (i = 0; i < d->nr_stripes; i++) + ret += atomic_read(d->stripe_sectors_dirty + i); + + return ret; +} + +static inline bool bcache_dev_stripe_dirty(struct bcache_device *d, + uint64_t offset, + unsigned nr_sectors) +{ + uint64_t stripe = offset >> d->stripe_size_bits; + + while (1) { + if (atomic_read(d->stripe_sectors_dirty + stripe)) + return true; + + if (nr_sectors <= 1 << d->stripe_size_bits) + return false; + + nr_sectors -= 1 << d->stripe_size_bits; + stripe++; + } +} + +static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, + unsigned cache_mode, bool would_skip) +{ + unsigned in_use = dc->disk.c->gc_stats.in_use; + + if (cache_mode != CACHE_MODE_WRITEBACK || + atomic_read(&dc->disk.detaching) || + in_use > CUTOFF_WRITEBACK_SYNC) + return false; + + if (dc->partial_stripes_expensive && + bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector, + bio_sectors(bio))) + return true; + + if (would_skip) + return false; + + return bio->bi_rw & REQ_SYNC || + in_use <= CUTOFF_WRITEBACK; +} + +void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); +void bch_writeback_queue(struct cached_dev *); +void bch_writeback_add(struct cached_dev *); + +void bch_sectors_dirty_init(struct cached_dev *dc); +void bch_cached_dev_writeback_init(struct cached_dev *); + +#endif diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 957a719e8c2f..df7b0a06b0ea 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2290,12 +2290,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) d = r10_bio->devs[1].devnum; wbio = r10_bio->devs[1].bio; wbio2 = r10_bio->devs[1].repl_bio; + /* Need to test wbio2->bi_end_io before we call + * generic_make_request as if the former is NULL, + * the latter is free to free wbio2. + */ + if (wbio2 && !wbio2->bi_end_io) + wbio2 = NULL; if (wbio->bi_end_io) { atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); generic_make_request(wbio); } - if (wbio2 && wbio2->bi_end_io) { + if (wbio2) { atomic_inc(&conf->mirrors[d].replacement->nr_pending); md_sync_acct(conf->mirrors[d].replacement->bdev, bio_sectors(wbio2)); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2bf094a587cb..78ea44336e75 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3462,6 +3462,7 @@ static void handle_stripe(struct stripe_head *sh) test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { set_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state); + clear_bit(STRIPE_REPLACED, &sh->state); } spin_unlock(&sh->stripe_lock); } @@ -3607,19 +3608,23 @@ static void handle_stripe(struct stripe_head *sh) handle_parity_checks5(conf, sh, &s, disks); } - if (s.replacing && s.locked == 0 - && !test_bit(STRIPE_INSYNC, &sh->state)) { + if ((s.replacing || s.syncing) && s.locked == 0 + && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) + && !test_bit(STRIPE_REPLACED, &sh->state)) { /* Write out to replacement devices where possible */ for (i = 0; i < conf->raid_disks; i++) - if (test_bit(R5_UPTODATE, &sh->dev[i].flags) && - test_bit(R5_NeedReplace, &sh->dev[i].flags)) { + if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { + WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); set_bit(R5_WantReplace, &sh->dev[i].flags); set_bit(R5_LOCKED, &sh->dev[i].flags); s.locked++; } - set_bit(STRIPE_INSYNC, &sh->state); + if (s.replacing) + set_bit(STRIPE_INSYNC, &sh->state); + set_bit(STRIPE_REPLACED, &sh->state); } if ((s.syncing || s.replacing) && s.locked == 0 && + !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index b0b663b119a8..70c49329ca9a 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -306,6 +306,7 @@ enum { STRIPE_SYNC_REQUESTED, STRIPE_SYNCING, STRIPE_INSYNC, + STRIPE_REPLACED, STRIPE_PREREAD_ACTIVE, STRIPE_DELAYED, STRIPE_DEGRADED, diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index f7b90661e321..e068a76a5f6f 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -66,14 +66,19 @@ EXPORT_SYMBOL(ssc_request); void ssc_free(struct ssc_device *ssc) { + bool disable_clk = true; + spin_lock(&user_lock); - if (ssc->user) { + if (ssc->user) ssc->user--; - clk_disable_unprepare(ssc->clk); - } else { + else { + disable_clk = false; dev_dbg(&ssc->pdev->dev, "device already free\n"); } spin_unlock(&user_lock); + + if (disable_clk) + clk_disable_unprepare(ssc->clk); } EXPORT_SYMBOL(ssc_free); diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index f9296abcf02a..6127ab64bb39 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -167,7 +167,7 @@ int mei_hbm_start_req(struct mei_device *dev) dev->hbm_state = MEI_HBM_IDLE; if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) { - dev_err(&dev->pdev->dev, "version message writet failed\n"); + dev_err(&dev->pdev->dev, "version message write failed\n"); dev->dev_state = MEI_DEV_RESETTING; mei_reset(dev, 1); return -ENODEV; diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index e4f8dec4dc3c..b22c7e247225 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -239,14 +239,18 @@ static int mei_me_hw_ready_wait(struct mei_device *dev) if (mei_me_hw_is_ready(dev)) return 0; + dev->recvd_hw_ready = false; mutex_unlock(&dev->device_lock); err = wait_event_interruptible_timeout(dev->wait_hw_ready, - dev->recvd_hw_ready, MEI_INTEROP_TIMEOUT); + dev->recvd_hw_ready, + mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT)); mutex_lock(&dev->device_lock); if (!err && !dev->recvd_hw_ready) { + if (!err) + err = -ETIMEDOUT; dev_err(&dev->pdev->dev, - "wait hw ready failed. status = 0x%x\n", err); - return -ETIMEDOUT; + "wait hw ready failed. status = %d\n", err); + return err; } dev->recvd_hw_ready = false; @@ -483,7 +487,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* check if ME wants a reset */ if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING && - dev->dev_state != MEI_DEV_INITIALIZING) { + dev->dev_state != MEI_DEV_INITIALIZING && + dev->dev_state != MEI_DEV_POWER_DOWN && + dev->dev_state != MEI_DEV_POWER_UP) { dev_dbg(&dev->pdev->dev, "FW not ready.\n"); mei_reset(dev, 1); mutex_unlock(&dev->device_lock); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index ed1d75203af6..e6f16f83ecde 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -148,7 +148,8 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled) dev->hbm_state = MEI_HBM_IDLE; - if (dev->dev_state != MEI_DEV_INITIALIZING) { + if (dev->dev_state != MEI_DEV_INITIALIZING && + dev->dev_state != MEI_DEV_POWER_UP) { if (dev->dev_state != MEI_DEV_DISABLED && dev->dev_state != MEI_DEV_POWER_DOWN) dev->dev_state = MEI_DEV_RESETTING; diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 847b1996ce8e..2c5a91bb8ec3 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -128,7 +128,7 @@ static inline int pxamci_set_power(struct pxamci_host *host, !!on ^ host->pdata->gpio_power_invert); } if (!host->vcc && host->pdata && host->pdata->setpower) - host->pdata->setpower(mmc_dev(host->mmc), vdd); + return host->pdata->setpower(mmc_dev(host->mmc), vdd); return 0; } diff --git a/drivers/of/irq.c b/drivers/of/irq.c index a3c1c5aae6a9..1264923ade0f 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -345,6 +345,7 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r) if (r && irq) { const char *name = NULL; + memset(r, 0, sizeof(*r)); /* * Get optional "interrupts-names" property to add a name * to the resource. @@ -482,8 +483,9 @@ void __init of_irq_init(const struct of_device_id *matches) } /* Get the next pending parent that might have children */ - desc = list_first_entry(&intc_parent_list, typeof(*desc), list); - if (list_empty(&intc_parent_list) || !desc) { + desc = list_first_entry_or_null(&intc_parent_list, + typeof(*desc), list); + if (!desc) { pr_err("of_irq_init: children remain, but no parents\n"); break; } diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index b29e20b7862f..bb7af78e4eed 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -388,7 +388,6 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn) /* Remove the EADS bridge device itself */ BUG_ON(!bus->self); pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self)); - eeh_remove_bus_device(bus->self, true); pci_stop_and_remove_bus_device(bus->self); return 0; diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 5b272bfd261d..2a00239661b3 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1193,6 +1193,7 @@ void pinctrl_unregister_map(struct pinctrl_map const *map) list_for_each_entry(maps_node, &pinctrl_maps, node) { if (maps_node->maps == map) { list_del(&maps_node->node); + kfree(maps_node); mutex_unlock(&pinctrl_maps_mutex); return; } diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 6866548fab31..7323cca440b5 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1483,6 +1483,7 @@ static int pcs_add_gpio_func(struct device_node *node, struct pcs_device *pcs) return ret; } +#ifdef CONFIG_PM static int pinctrl_single_suspend(struct platform_device *pdev, pm_message_t state) { @@ -1505,6 +1506,7 @@ static int pinctrl_single_resume(struct platform_device *pdev) return pinctrl_force_default(pcs->pctl); } +#endif static int pcs_probe(struct platform_device *pdev) { diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c index 7956df58d751..31f7d0e04aaa 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c @@ -3785,6 +3785,7 @@ static const struct regulator_desc sh73a0_vccq_mc0_desc = { static struct regulator_consumer_supply sh73a0_vccq_mc0_consumers[] = { REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.0"), + REGULATOR_SUPPLY("vqmmc", "ee100000.sdhi"), }; static const struct regulator_init_data sh73a0_vccq_mc0_init_data = { diff --git a/drivers/pinctrl/sirf/pinctrl-atlas6.c b/drivers/pinctrl/sirf/pinctrl-atlas6.c index 1fa39a444171..867c9681763c 100644 --- a/drivers/pinctrl/sirf/pinctrl-atlas6.c +++ b/drivers/pinctrl/sirf/pinctrl-atlas6.c @@ -496,7 +496,7 @@ static const unsigned sdmmc5_pins[] = { 24, 25, 26 }; static const struct sirfsoc_muxmask usp0_muxmask[] = { { .group = 1, - .mask = BIT(19) | BIT(20) | BIT(21) | BIT(22), + .mask = BIT(19) | BIT(20) | BIT(21) | BIT(22) | BIT(23), }, }; @@ -507,8 +507,21 @@ static const struct sirfsoc_padmux usp0_padmux = { .funcval = 0, }; -static const unsigned usp0_pins[] = { 51, 52, 53, 54 }; +static const unsigned usp0_pins[] = { 51, 52, 53, 54, 55 }; +static const struct sirfsoc_muxmask usp0_uart_nostreamctrl_muxmask[] = { + { + .group = 1, + .mask = BIT(20) | BIT(21), + }, +}; + +static const struct sirfsoc_padmux usp0_uart_nostreamctrl_padmux = { + .muxmask_counts = ARRAY_SIZE(usp0_uart_nostreamctrl_muxmask), + .muxmask = usp0_uart_nostreamctrl_muxmask, +}; + +static const unsigned usp0_uart_nostreamctrl_pins[] = { 52, 53 }; static const struct sirfsoc_muxmask usp1_muxmask[] = { { .group = 0, @@ -822,6 +835,8 @@ static const struct sirfsoc_pin_group sirfsoc_pin_groups[] = { SIRFSOC_PIN_GROUP("uart2grp", uart2_pins), SIRFSOC_PIN_GROUP("uart2_nostreamctrlgrp", uart2_nostreamctrl_pins), SIRFSOC_PIN_GROUP("usp0grp", usp0_pins), + SIRFSOC_PIN_GROUP("usp0_uart_nostreamctrl_grp", + usp0_uart_nostreamctrl_pins), SIRFSOC_PIN_GROUP("usp1grp", usp1_pins), SIRFSOC_PIN_GROUP("i2c0grp", i2c0_pins), SIRFSOC_PIN_GROUP("i2c1grp", i2c1_pins), @@ -862,6 +877,8 @@ static const char * const uart0grp[] = { "uart0grp" }; static const char * const uart1grp[] = { "uart1grp" }; static const char * const uart2grp[] = { "uart2grp" }; static const char * const uart2_nostreamctrlgrp[] = { "uart2_nostreamctrlgrp" }; +static const char * const usp0_uart_nostreamctrl_grp[] = { + "usp0_uart_nostreamctrl_grp" }; static const char * const usp0grp[] = { "usp0grp" }; static const char * const usp1grp[] = { "usp1grp" }; static const char * const i2c0grp[] = { "i2c0grp" }; @@ -904,6 +921,9 @@ static const struct sirfsoc_pmx_func sirfsoc_pmx_functions[] = { SIRFSOC_PMX_FUNCTION("uart2", uart2grp, uart2_padmux), SIRFSOC_PMX_FUNCTION("uart2_nostreamctrl", uart2_nostreamctrlgrp, uart2_nostreamctrl_padmux), SIRFSOC_PMX_FUNCTION("usp0", usp0grp, usp0_padmux), + SIRFSOC_PMX_FUNCTION("usp0_uart_nostreamctrl", + usp0_uart_nostreamctrl_grp, + usp0_uart_nostreamctrl_padmux), SIRFSOC_PMX_FUNCTION("usp1", usp1grp, usp1_padmux), SIRFSOC_PMX_FUNCTION("i2c0", i2c0grp, i2c0_padmux), SIRFSOC_PMX_FUNCTION("i2c1", i2c1grp, i2c1_padmux), diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 7b082157eb79..99d2930b18c8 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -185,7 +185,7 @@ static void sci_io_request_build_ssp_command_iu(struct isci_request *ireq) cmd_iu->_r_c = 0; sci_swab32_cpy(&cmd_iu->cdb, task->ssp_task.cmd->cmnd, - task->ssp_task.cmd->cmd_len / sizeof(u32)); + (task->ssp_task.cmd->cmd_len+3) / sizeof(u32)); } static void sci_task_request_build_ssp_task_iu(struct isci_request *ireq) diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 9bb020ac089c..0d30ca849e8f 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -491,6 +491,7 @@ int isci_task_abort_task(struct sas_task *task) struct isci_tmf tmf; int ret = TMF_RESP_FUNC_FAILED; unsigned long flags; + int target_done_already = 0; /* Get the isci_request reference from the task. Note that * this check does not depend on the pending request list @@ -505,9 +506,11 @@ int isci_task_abort_task(struct sas_task *task) /* If task is already done, the request isn't valid */ if (!(task->task_state_flags & SAS_TASK_STATE_DONE) && (task->task_state_flags & SAS_TASK_AT_INITIATOR) && - old_request) + old_request) { idev = isci_get_device(task->dev->lldd_dev); - + target_done_already = test_bit(IREQ_COMPLETE_IN_TARGET, + &old_request->flags); + } spin_unlock(&task->task_state_lock); spin_unlock_irqrestore(&ihost->scic_lock, flags); @@ -561,7 +564,7 @@ int isci_task_abort_task(struct sas_task *task) if (task->task_proto == SAS_PROTOCOL_SMP || sas_protocol_ata(task->task_proto) || - test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags) || + target_done_already || test_bit(IDEV_GONE, &idev->flags)) { spin_unlock_irqrestore(&ihost->scic_lock, flags); diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index f14665a6293d..6b1b4e91e53f 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -1857,11 +1857,16 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags) goto out; } - /* error info record present */ - if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) { + /* + * error info record present; slot->response is 32 bit aligned but may + * not be 64 bit aligned, so check for zero in two 32 bit reads + */ + if (unlikely((rx_desc & RXQ_ERR) + && (*((u32 *)slot->response) + || *(((u32 *)slot->response) + 1)))) { mv_dprintk("port %d slot %d rx_desc %X has error info" "%016llX.\n", slot->port->sas_port.id, slot_idx, - rx_desc, (u64)(*(u64 *)slot->response)); + rx_desc, get_unaligned_le64(slot->response)); tstat->stat = mvs_slot_err(mvi, task, slot_idx); tstat->resp = SAS_TASK_COMPLETE; goto out; diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 60e2fb7f2dca..d6b19dc80bee 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -39,6 +39,7 @@ #include <linux/irq.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <asm/unaligned.h> #include <scsi/libsas.h> #include <scsi/scsi.h> #include <scsi/scsi_tcq.h> diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 42ef481db942..ef0a5481b9dd 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -419,6 +419,8 @@ qla2x00_start_scsi(srb_t *sp) __constant_cpu_to_le16(CF_SIMPLE_TAG); break; } + } else { + cmd_pkt->control_flags = __constant_cpu_to_le16(CF_SIMPLE_TAG); } /* Load SCSI command packet. */ @@ -1307,11 +1309,11 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt, fcp_cmnd->task_attribute = TSK_ORDERED; break; default: - fcp_cmnd->task_attribute = 0; + fcp_cmnd->task_attribute = TSK_SIMPLE; break; } } else { - fcp_cmnd->task_attribute = 0; + fcp_cmnd->task_attribute = TSK_SIMPLE; } cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */ @@ -1525,7 +1527,12 @@ qla24xx_start_scsi(srb_t *sp) case ORDERED_QUEUE_TAG: cmd_pkt->task = TSK_ORDERED; break; + default: + cmd_pkt->task = TSK_SIMPLE; + break; } + } else { + cmd_pkt->task = TSK_SIMPLE; } /* Load SCSI command packet. */ diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 80f39b8b0223..86fcf2c313ad 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -838,10 +838,17 @@ static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq) static void sd_unprep_fn(struct request_queue *q, struct request *rq) { + struct scsi_cmnd *SCpnt = rq->special; + if (rq->cmd_flags & REQ_DISCARD) { free_page((unsigned long)rq->buffer); rq->buffer = NULL; } + if (SCpnt->cmnd != rq->cmd) { + mempool_free(SCpnt->cmnd, sd_cdb_pool); + SCpnt->cmnd = NULL; + SCpnt->cmd_len = 0; + } } /** @@ -1720,21 +1727,6 @@ static int sd_done(struct scsi_cmnd *SCpnt) if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) sd_dif_complete(SCpnt, good_bytes); - if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type) - == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) { - - /* We have to print a failed command here as the - * extended CDB gets freed before scsi_io_completion() - * is called. - */ - if (result) - scsi_print_command(SCpnt); - - mempool_free(SCpnt->cmnd, sd_cdb_pool); - SCpnt->cmnd = NULL; - SCpnt->cmd_len = 0; - } - return good_bytes; } diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 080abf2faf97..a8c344422a77 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -469,7 +469,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t ppos) { struct logger_log *log = file_get_log(iocb->ki_filp); - size_t orig = log->w_off; + size_t orig; struct logger_entry header; struct timespec now; ssize_t ret = 0; @@ -490,6 +490,8 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&log->mutex); + orig = log->w_off; + /* * Fix up any readers, pulling them forward to the first readable * entry after (what will be) the new write offset. We do this now diff --git a/drivers/staging/comedi/TODO b/drivers/staging/comedi/TODO index b10f739b7e3e..fa8da9aada30 100644 --- a/drivers/staging/comedi/TODO +++ b/drivers/staging/comedi/TODO @@ -9,4 +9,4 @@ TODO: Please send patches to Greg Kroah-Hartman <greg@kroah.com> and copy: Ian Abbott <abbotti@mev.co.uk> - Frank Mori Hess <fmhess@users.sourceforge.net> + H Hartley Sweeten <hsweeten@visionengravers.com> diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index ad275567c09a..34b2414e468f 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -1412,22 +1412,19 @@ static int do_cmd_ioctl(struct comedi_device *dev, DPRINTK("subdevice busy\n"); return -EBUSY; } - s->busy = file; /* make sure channel/gain list isn't too long */ if (cmd.chanlist_len > s->len_chanlist) { DPRINTK("channel/gain list too long %u > %d\n", cmd.chanlist_len, s->len_chanlist); - ret = -EINVAL; - goto cleanup; + return -EINVAL; } /* make sure channel/gain list isn't too short */ if (cmd.chanlist_len < 1) { DPRINTK("channel/gain list too short %u < 1\n", cmd.chanlist_len); - ret = -EINVAL; - goto cleanup; + return -EINVAL; } async->cmd = cmd; @@ -1437,8 +1434,7 @@ static int do_cmd_ioctl(struct comedi_device *dev, kmalloc(async->cmd.chanlist_len * sizeof(int), GFP_KERNEL); if (!async->cmd.chanlist) { DPRINTK("allocation failed\n"); - ret = -ENOMEM; - goto cleanup; + return -ENOMEM; } if (copy_from_user(async->cmd.chanlist, user_chanlist, @@ -1490,6 +1486,9 @@ static int do_cmd_ioctl(struct comedi_device *dev, comedi_set_subdevice_runflags(s, ~0, SRF_USER | SRF_RUNNING); + /* set s->busy _after_ setting SRF_RUNNING flag to avoid race with + * comedi_read() or comedi_write() */ + s->busy = file; ret = s->do_cmd(dev, s); if (ret == 0) return 0; @@ -1704,6 +1703,7 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg, void *file) { struct comedi_subdevice *s; + int ret; if (arg >= dev->n_subdevices) return -EINVAL; @@ -1720,7 +1720,11 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg, if (s->busy != file) return -EBUSY; - return do_cancel(dev, s); + ret = do_cancel(dev, s); + if (comedi_get_subdevice_runflags(s) & SRF_USER) + wake_up_interruptible(&s->async->wait_head); + + return ret; } /* @@ -2052,11 +2056,13 @@ static ssize_t comedi_write(struct file *file, const char __user *buf, if (!comedi_is_subdevice_running(s)) { if (count == 0) { + mutex_lock(&dev->mutex); if (comedi_is_subdevice_in_error(s)) retval = -EPIPE; else retval = 0; do_become_nonbusy(dev, s); + mutex_unlock(&dev->mutex); } break; } @@ -2155,11 +2161,13 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, if (n == 0) { if (!comedi_is_subdevice_running(s)) { + mutex_lock(&dev->mutex); do_become_nonbusy(dev, s); if (comedi_is_subdevice_in_error(s)) retval = -EPIPE; else retval = 0; + mutex_unlock(&dev->mutex); break; } if (file->f_flags & O_NONBLOCK) { @@ -2197,9 +2205,11 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, buf += n; break; /* makes device work like a pipe */ } - if (comedi_is_subdevice_idle(s) && - async->buf_read_count - async->buf_write_count == 0) { - do_become_nonbusy(dev, s); + if (comedi_is_subdevice_idle(s)) { + mutex_lock(&dev->mutex); + if (async->buf_read_count - async->buf_write_count == 0) + do_become_nonbusy(dev, s); + mutex_unlock(&dev->mutex); } set_current_state(TASK_RUNNING); remove_wait_queue(&async->wait_head, &wait); diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c index 5590ebf1da15..817f837b240d 100644 --- a/drivers/staging/frontier/alphatrack.c +++ b/drivers/staging/frontier/alphatrack.c @@ -827,11 +827,11 @@ static void usb_alphatrack_disconnect(struct usb_interface *intf) mutex_unlock(&dev->mtx); usb_alphatrack_delete(dev); } else { + atomic_set(&dev->writes_pending, 0); dev->intf = NULL; mutex_unlock(&dev->mtx); } - atomic_set(&dev->writes_pending, 0); mutex_unlock(&disconnect_mutex); dev_info(&intf->dev, "Alphatrack Surface #%d now disconnected\n", diff --git a/drivers/staging/gdm72xx/gdm_qos.c b/drivers/staging/gdm72xx/gdm_qos.c index b795353e8348..cc3692439a5c 100644 --- a/drivers/staging/gdm72xx/gdm_qos.c +++ b/drivers/staging/gdm72xx/gdm_qos.c @@ -250,8 +250,8 @@ static void send_qos_list(struct nic *nic, struct list_head *head) list_for_each_entry_safe(entry, n, head, list) { list_del(&entry->list); - free_qos_entry(entry); gdm_wimax_send_tx(entry->skb, entry->dev); + free_qos_entry(entry); } } diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig index 6156e3f2a3e7..0f75afcc03c4 100644 --- a/drivers/staging/imx-drm/Kconfig +++ b/drivers/staging/imx-drm/Kconfig @@ -33,7 +33,6 @@ config DRM_IMX_TVE config DRM_IMX_LDB tristate "Support for LVDS displays" depends on DRM_IMX - select OF_VIDEOMODE help Choose this to enable the internal LVDS Display Bridge (LDB) found on i.MX53 and i.MX6 processors. diff --git a/drivers/staging/tidspbridge/pmgr/dbll.c b/drivers/staging/tidspbridge/pmgr/dbll.c index c191ae203565..41e88abe47af 100644 --- a/drivers/staging/tidspbridge/pmgr/dbll.c +++ b/drivers/staging/tidspbridge/pmgr/dbll.c @@ -1120,8 +1120,11 @@ static int dbll_rmm_alloc(struct dynamic_loader_allocate *this, or DYN_EXTERNAL, then mem granularity information is present within the section name - only process if there are at least three tokens within the section name (just a minor optimization) */ - if (count >= 3) - strict_strtol(sz_last_token, 10, (long *)&req); + if (count >= 3) { + status = kstrtos32(sz_last_token, 10, &req); + if (status) + goto func_cont; + } if ((req == 0) || (req == 1)) { if (strcmp(sz_sec_last_token, "DYN_DARAM") == 0) { diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 5ef6508a587c..7ebf91d347f5 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -528,8 +528,11 @@ static void zram_reset_device(struct zram *zram) size_t index; struct zram_meta *meta; - if (!zram->init_done) + down_write(&zram->init_lock); + if (!zram->init_done) { + up_write(&zram->init_lock); return; + } meta = zram->meta; zram->init_done = 0; @@ -550,6 +553,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; set_capacity(zram->disk, 0); + up_write(&zram->init_lock); } static void zram_init_device(struct zram *zram, struct zram_meta *meta) diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 5de56f671a9d..f36950e4134f 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -54,6 +54,8 @@ MODULE_PARM_DESC(notify_delay_ms, * is some wrong values returned by cpuid for number of thresholds. */ #define MAX_NUMBER_OF_TRIPS 2 +/* Limit number of package temp zones */ +#define MAX_PKG_TEMP_ZONE_IDS 256 struct phy_dev_entry { struct list_head list; @@ -394,12 +396,16 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) char buffer[30]; int thres_count; u32 eax, ebx, ecx, edx; + u8 *temp; cpuid(6, &eax, &ebx, &ecx, &edx); thres_count = ebx & 0x07; if (!thres_count) return -ENODEV; + if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS) + return -ENODEV; + thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS); err = get_tj_max(cpu, &tj_max); @@ -417,13 +423,14 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) spin_lock(&pkg_work_lock); if (topology_physical_package_id(cpu) > max_phy_id) max_phy_id = topology_physical_package_id(cpu); - pkg_work_scheduled = krealloc(pkg_work_scheduled, - (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); - if (!pkg_work_scheduled) { + temp = krealloc(pkg_work_scheduled, + (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); + if (!temp) { spin_unlock(&pkg_work_lock); err = -ENOMEM; goto err_ret_free; } + pkg_work_scheduled = temp; pkg_work_scheduled[topology_physical_package_id(cpu)] = 0; spin_unlock(&pkg_work_lock); @@ -511,7 +518,7 @@ static int get_core_online(unsigned int cpu) /* Check if there is already an instance for this package */ if (!phdev) { - if (!cpu_has(c, X86_FEATURE_DTHERM) && + if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS)) return -ENODEV; if (pkg_temp_thermal_device_add(cpu)) @@ -562,7 +569,7 @@ static struct notifier_block pkg_temp_thermal_notifier __refdata = { }; static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS }, {} }; MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); @@ -592,7 +599,6 @@ static int __init pkg_temp_thermal_init(void) return 0; err_ret: - get_online_cpus(); for_each_online_cpu(i) put_core_offline(i); put_online_cpus(); diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 721904f8efa9..946ddd2b3a54 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -193,7 +193,8 @@ static int __init parse_options(struct early_serial8250_device *device, if (options) { options++; device->baud = simple_strtoul(options, NULL, 0); - length = min(strcspn(options, " "), sizeof(device->options)); + length = min(strcspn(options, " ") + 1, + sizeof(device->options)); strlcpy(device->options, options, length); } else { device->baud = probe_baud(port); diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 5e3d68917ffe..1456673bcca0 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -277,7 +277,7 @@ config SERIAL_TEGRA select SERIAL_CORE help Support for the on-chip UARTs on the NVIDIA Tegra series SOCs - providing /dev/ttyHS0, 1, 2, 3 and 4 (note, some machines may not + providing /dev/ttyTHS0, 1, 2, 3 and 4 (note, some machines may not provide all of these ports, depending on how the serial port are enabled). This driver uses the APB DMA to achieve higher baudrate and better performance. diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index ff171384ea52..dc6e96996ead 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -3478,7 +3478,7 @@ static int alloc_buf_list(SLMP_INFO *info) for ( i = 0; i < info->rx_buf_count; i++ ) { /* calculate and store physical address of this buffer entry */ info->rx_buf_list_ex[i].phys_entry = - info->buffer_list_phys + (i * sizeof(SCABUFSIZE)); + info->buffer_list_phys + (i * SCABUFSIZE); /* calculate and store physical address of */ /* next entry in cirular list of entries */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4191db32f12c..4a8a1d68002c 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -668,6 +668,15 @@ resubmit: static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { + /* Need to clear both directions for control ep */ + if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) == + USB_ENDPOINT_XFER_CONTROL) { + int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), + HUB_CLEAR_TT_BUFFER, USB_RT_PORT, + devinfo ^ 0x8000, tt, NULL, 0, 1000); + if (status) + return status; + } return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); @@ -2848,6 +2857,15 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) USB_CTRL_SET_TIMEOUT); } +/* Count of wakeup-enabled devices at or below udev */ +static unsigned wakeup_enabled_descendants(struct usb_device *udev) +{ + struct usb_hub *hub = usb_hub_to_struct_hub(udev); + + return udev->do_remote_wakeup + + (hub ? hub->wakeup_enabled_descendants : 0); +} + /* * usb_port_suspend - suspend a usb device's upstream port * @udev: device that's no longer in active use, not a root hub @@ -2888,8 +2906,8 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev) * Linux (2.6) currently has NO mechanisms to initiate that: no khubd * timer, no SRP, no requests through sysfs. * - * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get - * suspended only when their bus goes into global suspend (i.e., the root + * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get + * suspended until their bus goes into global suspend (i.e., the root * hub is suspended). Nevertheless, we change @udev->state to * USB_STATE_SUSPENDED as this is the device's "logical" state. The actual * upstream port setting is stored in @udev->port_is_suspended. @@ -2960,15 +2978,21 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) /* see 7.1.7.6 */ if (hub_is_superspeed(hub->hdev)) status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3); - else if (PMSG_IS_AUTO(msg)) - status = set_port_feature(hub->hdev, port1, - USB_PORT_FEAT_SUSPEND); + /* * For system suspend, we do not need to enable the suspend feature * on individual USB-2 ports. The devices will automatically go * into suspend a few ms after the root hub stops sending packets. * The USB 2.0 spec calls this "global suspend". + * + * However, many USB hubs have a bug: They don't relay wakeup requests + * from a downstream port if the port's suspend feature isn't on. + * Therefore we will turn on the suspend feature if udev or any of its + * descendants is enabled for remote wakeup. */ + else if (PMSG_IS_AUTO(msg) || wakeup_enabled_descendants(udev) > 0) + status = set_port_feature(hub->hdev, port1, + USB_PORT_FEAT_SUSPEND); else { really_suspend = false; status = 0; @@ -3003,15 +3027,16 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) if (!PMSG_IS_AUTO(msg)) status = 0; } else { - /* device has up to 10 msec to fully suspend */ dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n", (PMSG_IS_AUTO(msg) ? "auto-" : ""), udev->do_remote_wakeup); - usb_set_device_state(udev, USB_STATE_SUSPENDED); if (really_suspend) { udev->port_is_suspended = 1; + + /* device has up to 10 msec to fully suspend */ msleep(10); } + usb_set_device_state(udev, USB_STATE_SUSPENDED); } /* @@ -3293,7 +3318,11 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) unsigned port1; int status; - /* Warn if children aren't already suspended */ + /* + * Warn if children aren't already suspended. + * Also, add up the number of wakeup-enabled descendants. + */ + hub->wakeup_enabled_descendants = 0; for (port1 = 1; port1 <= hdev->maxchild; port1++) { struct usb_device *udev; @@ -3303,6 +3332,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg) if (PMSG_IS_AUTO(msg)) return -EBUSY; } + if (udev) + hub->wakeup_enabled_descendants += + wakeup_enabled_descendants(udev); } if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) { diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 6508e02b3dac..4e4790dea343 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -59,6 +59,9 @@ struct usb_hub { struct usb_tt tt; /* Transaction Translator */ unsigned mA_per_port; /* current for each child */ +#ifdef CONFIG_PM + unsigned wakeup_enabled_descendants; +#endif unsigned limited_power:1; unsigned quiescing:1; diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index 757aa18027d0..2378958ea63e 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -1,6 +1,6 @@ config USB_DWC3 tristate "DesignWare USB3 DRD Core Support" - depends on (USB || USB_GADGET) && GENERIC_HARDIRQS + depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD help Say Y or M here if your system has a Dual Role SuperSpeed diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c35d49d39b76..358375e0b291 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -450,7 +450,7 @@ static int dwc3_probe(struct platform_device *pdev) } if (IS_ERR(dwc->usb3_phy)) { - ret = PTR_ERR(dwc->usb2_phy); + ret = PTR_ERR(dwc->usb3_phy); /* * if -ENXIO is returned, it means PHY layer wasn't diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b69d322e3cab..27dad993b007 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -759,8 +759,8 @@ struct dwc3 { struct dwc3_event_type { u32 is_devspec:1; - u32 type:6; - u32 reserved8_31:25; + u32 type:7; + u32 reserved8_31:24; } __packed; #define DWC3_DEPEVT_XFERCOMPLETE 0x01 diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b5e5b35df49c..f77083fedc68 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1584,6 +1584,7 @@ err1: __dwc3_gadget_ep_disable(dwc->eps[0]); err0: + dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); return ret; diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 62f6802f6e0f..8e9368330b10 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -193,6 +193,7 @@ config USB_FUSB300 Faraday usb device controller FUSB300 driver config USB_FOTG210_UDC + depends on HAS_DMA tristate "Faraday FOTG210 USB Peripheral Controller" help Faraday USB2.0 OTG controller which can be configured as @@ -328,13 +329,14 @@ config USB_S3C_HSUDC config USB_MV_UDC tristate "Marvell USB2.0 Device Controller" - depends on GENERIC_HARDIRQS + depends on GENERIC_HARDIRQS && HAS_DMA help Marvell Socs (including PXA and MMP series) include a high speed USB2.0 OTG controller, which can be configured as high speed or full speed USB peripheral. config USB_MV_U3D + depends on HAS_DMA tristate "MARVELL PXA2128 USB 3.0 controller" help MARVELL PXA2128 Processor series include a super speed USB3.0 device @@ -639,6 +641,7 @@ config USB_CONFIGFS_RNDIS depends on USB_CONFIGFS depends on NET select USB_U_ETHER + select USB_U_RNDIS select USB_F_RNDIS help Microsoft Windows XP bundles the "Remote NDIS" (RNDIS) protocol, diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c index 073b938f9135..d9a6add0c852 100644 --- a/drivers/usb/gadget/at91_udc.c +++ b/drivers/usb/gadget/at91_udc.c @@ -870,8 +870,8 @@ static void clk_on(struct at91_udc *udc) if (udc->clocked) return; udc->clocked = 1; - clk_enable(udc->iclk); - clk_enable(udc->fclk); + clk_prepare_enable(udc->iclk); + clk_prepare_enable(udc->fclk); } static void clk_off(struct at91_udc *udc) @@ -880,8 +880,8 @@ static void clk_off(struct at91_udc *udc) return; udc->clocked = 0; udc->gadget.speed = USB_SPEED_UNKNOWN; - clk_disable(udc->fclk); - clk_disable(udc->iclk); + clk_disable_unprepare(udc->fclk); + clk_disable_unprepare(udc->iclk); } /* @@ -1725,7 +1725,7 @@ static int at91udc_probe(struct platform_device *pdev) /* init software state */ udc = &controller; udc->gadget.dev.parent = dev; - if (pdev->dev.of_node) + if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) at91udc_of_init(udc, pdev->dev.of_node); else memcpy(&udc->board, dev->platform_data, @@ -1782,12 +1782,14 @@ static int at91udc_probe(struct platform_device *pdev) } /* don't do anything until we have both gadget driver and VBUS */ - clk_enable(udc->iclk); + retval = clk_prepare_enable(udc->iclk); + if (retval) + goto fail1; at91_udp_write(udc, AT91_UDP_TXVC, AT91_UDP_TXVC_TXVDIS); at91_udp_write(udc, AT91_UDP_IDR, 0xffffffff); /* Clear all pending interrupts - UDP may be used by bootloader. */ at91_udp_write(udc, AT91_UDP_ICR, 0xffffffff); - clk_disable(udc->iclk); + clk_disable_unprepare(udc->iclk); /* request UDC and maybe VBUS irqs */ udc->udp_irq = platform_get_irq(pdev, 0); diff --git a/drivers/usb/gadget/f_ecm.c b/drivers/usb/gadget/f_ecm.c index 5d3561ea1c15..edab45da3741 100644 --- a/drivers/usb/gadget/f_ecm.c +++ b/drivers/usb/gadget/f_ecm.c @@ -959,8 +959,11 @@ static struct usb_function_instance *ecm_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = ecm_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_PTR(PTR_ERR(opts->net)); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &ecm_func_type); diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c index 90ee8022e8d8..d00392d879db 100644 --- a/drivers/usb/gadget/f_eem.c +++ b/drivers/usb/gadget/f_eem.c @@ -593,8 +593,11 @@ static struct usb_function_instance *eem_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = eem_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_CAST(opts->net); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &eem_func_type); diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c index 952177f7eb9b..1c28fe13328a 100644 --- a/drivers/usb/gadget/f_ncm.c +++ b/drivers/usb/gadget/f_ncm.c @@ -1350,8 +1350,11 @@ static struct usb_function_instance *ncm_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = ncm_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_PTR(PTR_ERR(opts->net)); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &ncm_func_type); diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c index 7944fb0efe3b..1bf26e9f38cd 100644 --- a/drivers/usb/gadget/f_phonet.c +++ b/drivers/usb/gadget/f_phonet.c @@ -656,8 +656,11 @@ static struct usb_function_instance *phonet_alloc_inst(void) opts->func_inst.free_func_inst = phonet_free_inst; opts->net = gphonet_setup_default(); - if (IS_ERR(opts->net)) - return ERR_PTR(PTR_ERR(opts->net)); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &phonet_func_type); diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 191df35ae69d..717ed7f95639 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -963,8 +963,11 @@ static struct usb_function_instance *rndis_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = rndis_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_CAST(opts->net); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &rndis_func_type); diff --git a/drivers/usb/gadget/f_subset.c b/drivers/usb/gadget/f_subset.c index 5601e1d96c4f..7c8674fa7e80 100644 --- a/drivers/usb/gadget/f_subset.c +++ b/drivers/usb/gadget/f_subset.c @@ -505,8 +505,11 @@ static struct usb_function_instance *geth_alloc_inst(void) mutex_init(&opts->lock); opts->func_inst.free_func_inst = geth_free_inst; opts->net = gether_setup_default(); - if (IS_ERR(opts->net)) - return ERR_CAST(opts->net); + if (IS_ERR(opts->net)) { + struct net_device *net = opts->net; + kfree(opts); + return ERR_CAST(net); + } config_group_init_type_name(&opts->func_inst.group, "", &gether_func_type); diff --git a/drivers/usb/gadget/fotg210-udc.c b/drivers/usb/gadget/fotg210-udc.c index cce5535b1dc6..10cd18ddd0d4 100644 --- a/drivers/usb/gadget/fotg210-udc.c +++ b/drivers/usb/gadget/fotg210-udc.c @@ -1074,7 +1074,7 @@ static struct usb_gadget_ops fotg210_gadget_ops = { .udc_stop = fotg210_udc_stop, }; -static int __exit fotg210_udc_remove(struct platform_device *pdev) +static int fotg210_udc_remove(struct platform_device *pdev) { struct fotg210_udc *fotg210 = dev_get_drvdata(&pdev->dev); @@ -1088,7 +1088,7 @@ static int __exit fotg210_udc_remove(struct platform_device *pdev) return 0; } -static int __init fotg210_udc_probe(struct platform_device *pdev) +static int fotg210_udc_probe(struct platform_device *pdev) { struct resource *res, *ires; struct fotg210_udc *fotg210 = NULL; diff --git a/drivers/usb/gadget/mv_u3d_core.c b/drivers/usb/gadget/mv_u3d_core.c index 07fdb3eaf48a..ec6a2d290398 100644 --- a/drivers/usb/gadget/mv_u3d_core.c +++ b/drivers/usb/gadget/mv_u3d_core.c @@ -1776,7 +1776,7 @@ static int mv_u3d_remove(struct platform_device *dev) kfree(u3d->eps); if (u3d->irq) - free_irq(u3d->irq, &dev->dev); + free_irq(u3d->irq, u3d); if (u3d->cap_regs) iounmap(u3d->cap_regs); @@ -1974,7 +1974,7 @@ static int mv_u3d_probe(struct platform_device *dev) return 0; err_unregister: - free_irq(u3d->irq, &dev->dev); + free_irq(u3d->irq, u3d); err_request_irq: err_get_irq: kfree(u3d->status_req); diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index ffd8fa541101..c28ac9872030 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -50,6 +50,8 @@ static DEFINE_MUTEX(udc_lock); /* ------------------------------------------------------------------------- */ +#ifdef CONFIG_HAS_DMA + int usb_gadget_map_request(struct usb_gadget *gadget, struct usb_request *req, int is_in) { @@ -99,6 +101,8 @@ void usb_gadget_unmap_request(struct usb_gadget *gadget, } EXPORT_SYMBOL_GPL(usb_gadget_unmap_request); +#endif /* CONFIG_HAS_DMA */ + /* ------------------------------------------------------------------------- */ void usb_gadget_set_state(struct usb_gadget *gadget, @@ -194,9 +198,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, dev_set_name(&gadget->dev, "gadget"); gadget->dev.parent = parent; +#ifdef CONFIG_HAS_DMA dma_set_coherent_mask(&gadget->dev, parent->coherent_dma_mask); gadget->dev.dma_parms = parent->dma_parms; gadget->dev.dma_mask = parent->dma_mask; +#endif if (release) gadget->dev.release = release; diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 2b702772d04d..6dce37555c4f 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -874,6 +874,7 @@ static int ehci_hub_control ( ehci->reset_done[wIndex] = jiffies + msecs_to_jiffies(20); usb_hcd_start_port_resume(&hcd->self, wIndex); + set_bit(wIndex, &ehci->resuming_ports); /* check the port again */ mod_timer(&ehci_to_hcd(ehci)->rh_timer, ehci->reset_done[wIndex]); diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index 4b8a2092432f..978c849f9c9a 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -13,6 +13,7 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev); void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); void sb800_prefetch(struct device *dev, int on); #else +struct pci_dev; static inline void usb_amd_quirk_pll_disable(void) {} static inline void usb_amd_quirk_pll_enable(void) {} static inline void usb_amd_dev_put(void) {} diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cc24e39b97d5..f00cb203faea 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -93,7 +93,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { - xhci->quirks |= XHCI_SPURIOUS_SUCCESS; xhci->quirks |= XHCI_EP_LIMIT_QUIRK; xhci->limit_active_eps = 64; xhci->quirks |= XHCI_SW_BW_CHECKING; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1e57eafa6910..5b08cd85f8e7 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -434,7 +434,7 @@ static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci, /* A ring has pending URBs if its TD list is not empty */ if (!(ep->ep_state & EP_HAS_STREAMS)) { - if (!(list_empty(&ep->ring->td_list))) + if (ep->ring && !(list_empty(&ep->ring->td_list))) xhci_ring_ep_doorbell(xhci, slot_id, ep_index, 0); return; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2c49f00260ca..41eb4fc33453 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -329,7 +329,7 @@ static void xhci_cleanup_msix(struct xhci_hcd *xhci) return; } -static void xhci_msix_sync_irqs(struct xhci_hcd *xhci) +static void __maybe_unused xhci_msix_sync_irqs(struct xhci_hcd *xhci) { int i; @@ -1181,9 +1181,6 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, } xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_HALTED) - return -ENODEV; - if (check_virt_dev) { if (!udev->slot_id || !xhci->devs[udev->slot_id]) { printk(KERN_DEBUG "xHCI %s called with unaddressed " @@ -1199,6 +1196,9 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, } } + if (xhci->xhc_state & XHCI_STATE_HALTED) + return -ENODEV; + return 1; } @@ -3898,7 +3898,7 @@ int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1) * Issue an Evaluate Context command to change the Maximum Exit Latency in the * slot context. If that succeeds, store the new MEL in the xhci_virt_device. */ -static int xhci_change_max_exit_latency(struct xhci_hcd *xhci, +static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci, struct usb_device *udev, u16 max_exit_latency) { struct xhci_virt_device *virt_dev; @@ -4892,6 +4892,13 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) get_quirks(dev, xhci); + /* In xhci controllers which follow xhci 1.0 spec gives a spurious + * success event after a short transfer. This quirk will ignore such + * spurious event. + */ + if (xhci->hci_version > 0x96) + xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index c21386ec5d35..de98906f786d 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3247,6 +3247,7 @@ static const struct usb_device_id sisusb_table[] = { { USB_DEVICE(0x0711, 0x0903) }, { USB_DEVICE(0x0711, 0x0918) }, { USB_DEVICE(0x0711, 0x0920) }, + { USB_DEVICE(0x0711, 0x0950) }, { USB_DEVICE(0x182d, 0x021c) }, { USB_DEVICE(0x182d, 0x0269) }, { } diff --git a/drivers/usb/phy/phy-omap-usb3.c b/drivers/usb/phy/phy-omap-usb3.c index efe6e1464f45..a2fb30bbb971 100644 --- a/drivers/usb/phy/phy-omap-usb3.c +++ b/drivers/usb/phy/phy-omap-usb3.c @@ -71,9 +71,9 @@ static struct usb_dpll_params omap_usb3_dpll_params[NUM_SYS_CLKS] = { {1250, 5, 4, 20, 0}, /* 12 MHz */ {3125, 20, 4, 20, 0}, /* 16.8 MHz */ {1172, 8, 4, 20, 65537}, /* 19.2 MHz */ + {1000, 7, 4, 10, 0}, /* 20 MHz */ {1250, 12, 4, 20, 0}, /* 26 MHz */ {3125, 47, 4, 20, 92843}, /* 38.4 MHz */ - {1000, 7, 4, 10, 0}, /* 20 MHz */ }; diff --git a/drivers/usb/phy/phy-samsung-usb2.c b/drivers/usb/phy/phy-samsung-usb2.c index 1011c16ade7e..758b86d0fcb3 100644 --- a/drivers/usb/phy/phy-samsung-usb2.c +++ b/drivers/usb/phy/phy-samsung-usb2.c @@ -388,7 +388,7 @@ static int samsung_usb2phy_probe(struct platform_device *pdev) clk = devm_clk_get(dev, "otg"); if (IS_ERR(clk)) { - dev_err(dev, "Failed to get otg clock\n"); + dev_err(dev, "Failed to get usbhost/otg clock\n"); return PTR_ERR(clk); } diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index ed4949faa70d..805940c37353 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -855,10 +855,6 @@ static int usbhsg_gadget_stop(struct usb_gadget *gadget, struct usbhsg_gpriv *gpriv = usbhsg_gadget_to_gpriv(gadget); struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv); - if (!driver || - !driver->unbind) - return -EINVAL; - usbhsg_try_stop(priv, USBHSG_STATUS_REGISTERD); gpriv->driver = NULL; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index d6ef2f8da37d..0eae4ba3760e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -53,6 +53,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ + { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ @@ -118,6 +119,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ @@ -148,6 +151,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 0a818b238508..603fb70dde80 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -905,20 +905,20 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) status = mos7840_get_reg_sync(port, mos7840_port->SpRegOffset, &Data); if (status < 0) { dev_dbg(&port->dev, "Reading Spreg failed\n"); - return -1; + goto err; } Data |= 0x80; status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Spreg failed\n"); - return -1; + goto err; } Data &= ~0x80; status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Spreg failed\n"); - return -1; + goto err; } /* End of block to be checked */ @@ -927,7 +927,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) &Data); if (status < 0) { dev_dbg(&port->dev, "Reading Controlreg failed\n"); - return -1; + goto err; } Data |= 0x08; /* Driver done bit */ Data |= 0x20; /* rx_disable */ @@ -935,7 +935,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) mos7840_port->ControlRegOffset, Data); if (status < 0) { dev_dbg(&port->dev, "writing Controlreg failed\n"); - return -1; + goto err; } /* do register settings here */ /* Set all regs to the device default values. */ @@ -946,21 +946,21 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) status = mos7840_set_uart_reg(port, INTERRUPT_ENABLE_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "disabling interrupts failed\n"); - return -1; + goto err; } /* Set FIFO_CONTROL_REGISTER to the default value */ Data = 0x00; status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER failed\n"); - return -1; + goto err; } Data = 0xcf; status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data); if (status < 0) { dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER failed\n"); - return -1; + goto err; } Data = 0x03; @@ -1103,6 +1103,15 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port) /* mos7840_change_port_settings(mos7840_port,old_termios); */ return 0; +err: + for (j = 0; j < NUM_URBS; ++j) { + urb = mos7840_port->write_urb_pool[j]; + if (!urb) + continue; + kfree(urb->transfer_buffer); + usb_free_urb(urb); + } + return status; } /***************************************************************************** diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5dd857de05b0..1cf6f125f5f0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -341,17 +341,12 @@ static void option_instat_callback(struct urb *urb); #define OLIVETTI_VENDOR_ID 0x0b3c #define OLIVETTI_PRODUCT_OLICARD100 0xc000 #define OLIVETTI_PRODUCT_OLICARD145 0xc003 +#define OLIVETTI_PRODUCT_OLICARD200 0xc005 /* Celot products */ #define CELOT_VENDOR_ID 0x211f #define CELOT_PRODUCT_CT680M 0x6801 -/* ONDA Communication vendor id */ -#define ONDA_VENDOR_ID 0x1ee8 - -/* ONDA MT825UP HSDPA 14.2 modem */ -#define ONDA_MT825UP 0x000b - /* Samsung products */ #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 @@ -444,7 +439,8 @@ static void option_instat_callback(struct urb *urb); /* Hyundai Petatel Inc. products */ #define PETATEL_VENDOR_ID 0x1ff4 -#define PETATEL_PRODUCT_NP10T 0x600e +#define PETATEL_PRODUCT_NP10T_600A 0x600a +#define PETATEL_PRODUCT_NP10T_600E 0x600e /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 @@ -782,6 +778,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) }, @@ -817,7 +814,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, @@ -1256,8 +1254,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ - { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) }, @@ -1329,9 +1327,12 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, - { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) }, /* D-Link DWM-156 (variant) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) }, /* D-Link DWM-156 (variant) */ @@ -1339,6 +1340,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 7182bb774b79..375b5a400b6f 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -371,7 +371,7 @@ static int ti_startup(struct usb_serial *serial) usb_set_serial_data(serial, tdev); /* determine device type */ - if (usb_match_id(serial->interface, ti_id_table_3410)) + if (serial->type == &ti_1port_device) tdev->td_is_3410 = 1; dev_dbg(&dev->dev, "%s - device type is %s\n", __func__, tdev->td_is_3410 ? "3410" : "5052"); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1799335288bd..c015f2c16729 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -665,6 +665,13 @@ UNUSUAL_DEV( 0x054c, 0x016a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY ), +/* Submitted by Ren Bigcren <bigcren.ren@sonymobile.com> */ +UNUSUAL_DEV( 0x054c, 0x02a5, 0x0100, 0x0100, + "Sony Corp.", + "MicroVault Flash Drive", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_READ_CAPACITY_16 ), + /* floppy reports multiple luns */ UNUSUAL_DEV( 0x055d, 0x2020, 0x0000, 0x0210, "SAMSUNG", diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 027be91db139..969a85960e9f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -15,7 +15,6 @@ #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/workqueue.h> -#include <linux/rcupdate.h> #include <linux/file.h> #include <linux/slab.h> @@ -346,12 +345,11 @@ static void handle_tx(struct vhost_net *net) struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; - /* TODO: check that we are running from vhost_worker? */ - sock = rcu_dereference_check(vq->private_data, 1); + mutex_lock(&vq->mutex); + sock = vq->private_data; if (!sock) - return; + goto out; - mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -461,7 +459,7 @@ static void handle_tx(struct vhost_net *net) break; } } - +out: mutex_unlock(&vq->mutex); } @@ -570,14 +568,14 @@ static void handle_rx(struct vhost_net *net) s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; - /* TODO: check that we are running from vhost_worker? */ - struct socket *sock = rcu_dereference_check(vq->private_data, 1); - - if (!sock) - return; + struct socket *sock; mutex_lock(&vq->mutex); + sock = vq->private_data; + if (!sock) + goto out; vhost_disable_notify(&net->dev, vq); + vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -652,7 +650,7 @@ static void handle_rx(struct vhost_net *net) break; } } - +out: mutex_unlock(&vq->mutex); } @@ -750,8 +748,7 @@ static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; - sock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + sock = vq->private_data; if (!sock) return 0; @@ -764,10 +761,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct socket *sock; mutex_lock(&vq->mutex); - sock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + sock = vq->private_data; vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, NULL); + vq->private_data = NULL; mutex_unlock(&vq->mutex); return sock; } @@ -923,8 +919,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } /* start polling new socket */ - oldsock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + oldsock = vq->private_data; if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); @@ -934,7 +929,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, sock); + vq->private_data = sock; r = vhost_init_used(vq); if (r) goto err_used; @@ -968,7 +963,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) return 0; err_used: - rcu_assign_pointer(vq->private_data, oldsock); + vq->private_data = oldsock; vhost_net_enable_vq(n, vq); if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 06adf31a9248..0c27c7df1b09 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -902,19 +902,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) int head, ret; u8 target; + mutex_lock(&vq->mutex); /* * We can handle the vq only after the endpoint is setup by calling the * VHOST_SCSI_SET_ENDPOINT ioctl. - * - * TODO: Check that we are running from vhost_worker which acts - * as read-side critical section for vhost kind of RCU. - * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h */ - vs_tpg = rcu_dereference_check(vq->private_data, 1); + vs_tpg = vq->private_data; if (!vs_tpg) - return; + goto out; - mutex_lock(&vq->mutex); vhost_disable_notify(&vs->dev, vq); for (;;) { @@ -1064,6 +1060,7 @@ err_free: vhost_scsi_free_cmd(cmd); err_cmd: vhost_scsi_send_bad_target(vs, vq, head, out); +out: mutex_unlock(&vq->mutex); } @@ -1232,9 +1229,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, sizeof(vs->vs_vhost_wwpn)); for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; - /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); - rcu_assign_pointer(vq->private_data, vs_tpg); + vq->private_data = vs_tpg; vhost_init_used(vq); mutex_unlock(&vq->mutex); } @@ -1313,9 +1309,8 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; - /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); - rcu_assign_pointer(vq->private_data, NULL); + vq->private_data = NULL; mutex_unlock(&vq->mutex); } } diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index a73ea217f24d..339eae85859a 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/workqueue.h> -#include <linux/rcupdate.h> #include <linux/file.h> #include <linux/slab.h> @@ -200,9 +199,8 @@ static long vhost_test_run(struct vhost_test *n, int test) priv = test ? n : NULL; /* start polling new socket */ - oldpriv = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); - rcu_assign_pointer(vq->private_data, priv); + oldpriv = vq->private_data; + vq->private_data = priv; r = vhost_init_used(&n->vqs[index]); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 42298cd23c73..4465ed5f316d 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -103,14 +103,8 @@ struct vhost_virtqueue { struct iovec iov[UIO_MAXIOV]; struct iovec *indirect; struct vring_used_elem *heads; - /* We use a kind of RCU to access private pointer. - * All readers access it from worker, which makes it possible to - * flush the vhost_work instead of synchronize_rcu. Therefore readers do - * not need to call rcu_read_lock/rcu_read_unlock: the beginning of - * vhost_work execution acts instead of rcu_read_lock() and the end of - * vhost_work execution acts instead of rcu_read_unlock(). - * Writers use virtqueue mutex. */ - void __rcu *private_data; + /* Protected by virtqueue mutex. */ + void *private_data; /* Log write descriptors */ void __user *log_base; struct vhost_log *log; diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c index 5ca11b066b7e..886e797f75f9 100644 --- a/drivers/video/backlight/max8925_bl.c +++ b/drivers/video/backlight/max8925_bl.c @@ -101,33 +101,37 @@ static const struct backlight_ops max8925_backlight_ops = { .get_brightness = max8925_backlight_get_brightness, }; -#ifdef CONFIG_OF -static int max8925_backlight_dt_init(struct platform_device *pdev, - struct max8925_backlight_pdata *pdata) +static void max8925_backlight_dt_init(struct platform_device *pdev) { struct device_node *nproot = pdev->dev.parent->of_node, *np; - int dual_string; + struct max8925_backlight_pdata *pdata; + u32 val; + + if (!nproot || !IS_ENABLED(CONFIG_OF)) + return; + + pdata = devm_kzalloc(&pdev->dev, + sizeof(struct max8925_backlight_pdata), + GFP_KERNEL); + if (!pdata) + return; - if (!nproot) - return -ENODEV; np = of_find_node_by_name(nproot, "backlight"); if (!np) { dev_err(&pdev->dev, "failed to find backlight node\n"); - return -ENODEV; + return; } - of_property_read_u32(np, "maxim,max8925-dual-string", &dual_string); - pdata->dual_string = dual_string; - return 0; + if (!of_property_read_u32(np, "maxim,max8925-dual-string", &val)) + pdata->dual_string = val; + + pdev->dev.platform_data = pdata; } -#else -#define max8925_backlight_dt_init(x, y) (-1) -#endif static int max8925_backlight_probe(struct platform_device *pdev) { struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent); - struct max8925_backlight_pdata *pdata = pdev->dev.platform_data; + struct max8925_backlight_pdata *pdata; struct max8925_backlight_data *data; struct backlight_device *bl; struct backlight_properties props; @@ -170,13 +174,10 @@ static int max8925_backlight_probe(struct platform_device *pdev) platform_set_drvdata(pdev, bl); value = 0; - if (pdev->dev.parent->of_node && !pdata) { - pdata = devm_kzalloc(&pdev->dev, - sizeof(struct max8925_backlight_pdata), - GFP_KERNEL); - max8925_backlight_dt_init(pdev, pdata); - } + if (!pdev->dev.platform_data) + max8925_backlight_dt_init(pdev); + pdata = pdev->dev.platform_data; if (pdata) { if (pdata->lxw_scl) value |= (1 << 7); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0eda52738ec4..72a5d5b04494 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file, if (name.name[1] == '.' && name.len == 2) return 0; } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + fc = get_fuse_conn(dir); name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry && dentry->d_inode) { + if (dentry) { inode = dentry->d_inode; - if (get_node_id(inode) == o->nodeid) { + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + err = d_invalidate(dentry); + if (err) + goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; + } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); spin_lock(&fc->lock); fi->nlookup++; spin_unlock(&fc->lock); + fuse_change_attributes(inode, &o->attr, + entry_attr_timeout(o), + attr_version); + /* * The other branch to 'found' comes via fuse_iget() * which bumps nlookup inside */ goto found; } - err = d_invalidate(dentry); - if (err) - goto out; dput(dentry); - dentry = NULL; } dentry = d_alloc(parent, &name); @@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&fc->inst_mutex); + alias = fuse_d_add_directory(dentry, inode); + mutex_unlock(&fc->inst_mutex); + err = PTR_ERR(alias); + if (IS_ERR(alias)) { + iput(inode); + goto out; + } + } else { + alias = d_splice_alias(inode, dentry); + } + if (alias) { dput(dentry); dentry = alias; } found: - fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), - attr_version); - fuse_change_entry_timeout(dentry, o); err = 0; out: - if (dentry) - dput(dentry); + dput(dentry); return err; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c74d6168db99..3850b018815f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1118,11 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, len, ((char *)p - (char *)q) + 4); BUG(); } - len = (char *)p - (char *)q - (bmval_len << 2); *q++ = htonl(bmval0); *q++ = htonl(bmval1); if (bmval_len == 3) *q++ = htonl(bmval2); + len = (char *)p - (char *)(q + 1); *q = htonl(len); /* out: */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8ff6a0019b0b..c827acb0e943 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else { + *filp = NULL; + } else { host_err = ima_file_check(*filp, may_flags); if (may_flags & NFSD_MAY_64BIT_COOKIE) diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 07d735a80a0f..e5869b50dc41 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -39,6 +39,9 @@ typedef struct xfs_timestamp { * There is a very similar struct icdinode in xfs_inode which matches the * layout of the first 96 bytes of this structure, but is kept in native * format instead of big endian. + * + * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed + * padding field for v3 inodes. */ typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b78481f99d9d..bb262c25c8de 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -896,7 +896,6 @@ xfs_dinode_to_disk( to->di_projid_lo = cpu_to_be16(from->di_projid_lo); to->di_projid_hi = cpu_to_be16(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - to->di_flushiter = cpu_to_be16(from->di_flushiter); to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); @@ -924,6 +923,9 @@ xfs_dinode_to_disk( to->di_lsn = cpu_to_be64(from->di_lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); + to->di_flushiter = 0; + } else { + to->di_flushiter = cpu_to_be16(from->di_flushiter); } } @@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc( /* * Read the disk inode attributes into the in-core inode structure. * - * If we are initialising a new inode and we are not utilising the - * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core - * with a random generation number. If we are keeping inodes around, we need to - * read the inode cluster to get the existing generation number off disk. + * For version 5 superblocks, if we are initialising a new inode and we are not + * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new + * inode core with a random generation number. If we are keeping inodes around, + * we need to read the inode cluster to get the existing generation number off + * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode + * format) then log recovery is dependent on the di_flushiter field being + * initialised from the current on-disk value and hence we must also read the + * inode off disk. */ int xfs_iread( @@ -1054,6 +1060,7 @@ xfs_iread( /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && + xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); @@ -2882,12 +2889,18 @@ xfs_iflush_int( __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } + /* - * bump the flush iteration count, used to detect flushes which - * postdate a log record during recovery. This is redundant as we now - * log every change and hence this can't happen. Still, it doesn't hurt. + * Inode item log recovery for v1/v2 inodes are dependent on the + * di_flushiter count for correct sequencing. We bump the flush + * iteration count so we can detect flushes which postdate a log record + * during recovery. This is redundant as we now log every change and + * hence this can't happen but we need to still do it to ensure + * backwards compatibility with old kernels that predate logging all + * inode changes. */ - ip->i_d.di_flushiter++; + if (ip->i_d.di_version < 3) + ip->i_d.di_flushiter++; /* * Copy the dirty parts of the inode into the on-disk diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6fcc910a50b9..7681b19aa5dc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2( goto error; } - /* Skip replay when the on disk inode is newer than the log one */ - if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { + /* + * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes + * are transactional and if ordering is necessary we can determine that + * more accurately by the LSN field in the V3 inode core. Don't trust + * the inode versions we might be changing them here - use the + * superblock flag to determine whether we need to look at di_flushiter + * to skip replay when the on disk inode is newer than the log one + */ + if (!xfs_sb_version_hascrc(&mp->m_sb) && + dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less * than smaller numbers @@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2( goto error; } } + /* Take the opportunity to reset the flush iteration count */ dicp->di_flushiter = 0; diff --git a/include/acpi/video.h b/include/acpi/video.h index b26dc4fb7ba8..61109f2609fc 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -17,21 +17,12 @@ struct acpi_device; #define ACPI_VIDEO_DISPLAY_LEGACY_TV 0x0200 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE) -extern int __acpi_video_register(bool backlight_quirks); -static inline int acpi_video_register(void) -{ - return __acpi_video_register(false); -} -static inline int acpi_video_register_with_quirks(void) -{ - return __acpi_video_register(true); -} +extern int acpi_video_register(void); extern void acpi_video_unregister(void); extern int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid); #else static inline int acpi_video_register(void) { return 0; } -static inline int acpi_video_register_with_quirks(void) { return 0; } static inline void acpi_video_unregister(void) { return; } static inline int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid) diff --git a/include/dt-bindings/clock/vf610-clock.h b/include/dt-bindings/clock/vf610-clock.h index 15e997fa78f2..4aa2b48cd151 100644 --- a/include/dt-bindings/clock/vf610-clock.h +++ b/include/dt-bindings/clock/vf610-clock.h @@ -158,6 +158,8 @@ #define VF610_CLK_GPU_SEL 145 #define VF610_CLK_GPU_EN 146 #define VF610_CLK_GPU2D 147 -#define VF610_CLK_END 148 +#define VF610_CLK_ENET0 148 +#define VF610_CLK_ENET1 149 +#define VF610_CLK_END 150 #endif /* __DT_BINDINGS_CLOCK_VF610_H */ diff --git a/include/dt-bindings/pinctrl/am33xx.h b/include/dt-bindings/pinctrl/am33xx.h index 469e0325e6f4..2fbc804e1a45 100644 --- a/include/dt-bindings/pinctrl/am33xx.h +++ b/include/dt-bindings/pinctrl/am33xx.h @@ -5,7 +5,7 @@ #ifndef _DT_BINDINGS_PINCTRL_AM33XX_H #define _DT_BINDINGS_PINCTRL_AM33XX_H -#include <include/dt-bindings/pinctrl/omap.h> +#include <dt-bindings/pinctrl/omap.h> /* am33xx specific mux bit defines */ #undef PULL_ENA diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6ad72f92469c..353ba256f368 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -191,7 +191,6 @@ extern bool wmi_has_guid(const char *guid); #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 -#define ACPI_VIDEO_SKIP_BACKLIGHT 0x1000 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 297462b9f41a..e9ac882868c0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -542,8 +542,7 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); -int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, - char *buf, size_t buflen); +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroup_task_count(const struct cgroup *cgrp); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 6e7ec64b69ab..b613ffd402d1 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -1,86 +1,55 @@ -/* Add subsystem definitions of the form SUBSYS(<name>) in this - * file. Surround each one by a line of comment markers so that - * patches don't collide +/* + * List of cgroup subsystems. + * + * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ - -/* */ - -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED) SUBSYS(cpu_cgroup) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT) SUBSYS(cpuacct) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_MEMCG) SUBSYS(mem_cgroup) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE) SUBSYS(devices) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER) SUBSYS(freezer) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP) SUBSYS(net_cls) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP) SUBSYS(blkio) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF) SUBSYS(perf) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP) SUBSYS(net_prio) #endif -/* */ - #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB) SUBSYS(hugetlb) #endif - -/* */ - -#ifdef CONFIG_CGROUP_BCACHE -SUBSYS(bcache) -#endif - -/* */ +/* + * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. + */ diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index b3cb71f0d3b0..a9c96d865ee7 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -3,10 +3,6 @@ #include <linux/types.h> -#define CRC_T10DIF_DIGEST_SIZE 2 -#define CRC_T10DIF_BLOCK_SIZE 1 - -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); __u16 crc_t10dif(unsigned char const *, size_t); #endif diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1b4d4ee1168f..de7d74ab3de6 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -177,7 +177,11 @@ enum drbd_ret_code { ERR_NEED_APV_100 = 163, ERR_NEED_ALLOW_TWO_PRI = 164, ERR_MD_UNCLEAN = 165, - + ERR_MD_LAYOUT_CONNECTED = 166, + ERR_MD_LAYOUT_TOO_BIG = 167, + ERR_MD_LAYOUT_TOO_SMALL = 168, + ERR_MD_LAYOUT_NO_FIT = 169, + ERR_IMPLICIT_SHRINK = 170, /* insert new ones above this line */ AFTER_LAST_ERR_CODE }; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index d0d8fac8a6e4..e8c44572b8cb 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) + __u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF) + __u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF) ) GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 1fedf2b17cc8..17e50bb00521 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -215,4 +215,13 @@ #define DRBD_ALWAYS_ASBP_DEF 0 #define DRBD_USE_RLE_DEF 1 +#define DRBD_AL_STRIPES_MIN 1 +#define DRBD_AL_STRIPES_MAX 1024 +#define DRBD_AL_STRIPES_DEF 1 +#define DRBD_AL_STRIPES_SCALE '1' + +#define DRBD_AL_STRIPE_SIZE_MIN 4 +#define DRBD_AL_STRIPE_SIZE_MAX 16777216 +#define DRBD_AL_STRIPE_SIZE_DEF 32 +#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ #endif diff --git a/include/linux/edac.h b/include/linux/edac.h index 0b763276f619..5c6d7fbaf89e 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -622,7 +622,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type bus; + struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ @@ -742,4 +742,9 @@ struct mem_ctl_info { #endif }; +/* + * Maximum number of memory controllers in the coherent fabric. + */ +#define EDAC_MAX_MCS 16 + #endif diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index dab34a1deb2c..b6bdcd66c07d 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -103,15 +103,15 @@ #define IMX6Q_GPR1_EXC_MON_MASK BIT(22) #define IMX6Q_GPR1_EXC_MON_OKAY 0x0 #define IMX6Q_GPR1_EXC_MON_SLVE BIT(22) -#define IMX6Q_GPR1_MIPI_IPU2_SEL_MASK BIT(21) -#define IMX6Q_GPR1_MIPI_IPU2_SEL_GASKET 0x0 -#define IMX6Q_GPR1_MIPI_IPU2_SEL_IOMUX BIT(21) -#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK BIT(20) -#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET 0x0 -#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX BIT(20) -#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK BIT(19) +#define IMX6Q_GPR1_ENET_CLK_SEL_MASK BIT(21) +#define IMX6Q_GPR1_ENET_CLK_SEL_PAD 0 +#define IMX6Q_GPR1_ENET_CLK_SEL_ANATOP BIT(21) +#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK BIT(20) #define IMX6Q_GPR1_MIPI_IPU2_MUX_GASKET 0x0 -#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX BIT(19) +#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX BIT(20) +#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK BIT(19) +#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET 0x0 +#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX BIT(19) #define IMX6Q_GPR1_PCIE_TEST_PD BIT(18) #define IMX6Q_GPR1_IPU_VPU_MUX_MASK BIT(17) #define IMX6Q_GPR1_IPU_VPU_MUX_IPU1 0x0 @@ -279,41 +279,88 @@ #define IMX6Q_GPR13_CAN2_STOP_REQ BIT(29) #define IMX6Q_GPR13_CAN1_STOP_REQ BIT(28) #define IMX6Q_GPR13_ENET_STOP_REQ BIT(27) -#define IMX6Q_GPR13_SATA_PHY_8_MASK (0x7 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_0_5_DB (0x0 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_1_0_DB (0x1 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_1_5_DB (0x2 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_2_0_DB (0x3 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_2_5_DB (0x4 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_3_0_DB (0x5 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_3_5_DB (0x6 << 24) -#define IMX6Q_GPR13_SATA_PHY_8_4_0_DB (0x7 << 24) -#define IMX6Q_GPR13_SATA_PHY_7_MASK (0x1f << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA1I (0x10 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA1M (0x10 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA1X (0x1a << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA2I (0x12 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA2M (0x12 << 19) -#define IMX6Q_GPR13_SATA_PHY_7_SATA2X (0x1a << 19) -#define IMX6Q_GPR13_SATA_PHY_6_MASK (0x7 << 16) -#define IMX6Q_GPR13_SATA_SPEED_MASK BIT(15) -#define IMX6Q_GPR13_SATA_SPEED_1P5G 0x0 -#define IMX6Q_GPR13_SATA_SPEED_3P0G BIT(15) -#define IMX6Q_GPR13_SATA_PHY_5 BIT(14) -#define IMX6Q_GPR13_SATA_PHY_4_MASK (0x7 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_16_16 (0x0 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_14_16 (0x1 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_12_16 (0x2 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_10_16 (0x3 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_9_16 (0x4 << 11) -#define IMX6Q_GPR13_SATA_PHY_4_8_16 (0x5 << 11) -#define IMX6Q_GPR13_SATA_PHY_3_MASK (0xf << 7) -#define IMX6Q_GPR13_SATA_PHY_3_OFF 0x7 -#define IMX6Q_GPR13_SATA_PHY_2_MASK (0x1f << 2) -#define IMX6Q_GPR13_SATA_PHY_2_OFF 0x2 -#define IMX6Q_GPR13_SATA_PHY_1_MASK (0x3 << 0) -#define IMX6Q_GPR13_SATA_PHY_1_FAST (0x0 << 0) -#define IMX6Q_GPR13_SATA_PHY_1_MED (0x1 << 0) -#define IMX6Q_GPR13_SATA_PHY_1_SLOW (0x2 << 0) - +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK (0x7 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_0_5_DB (0x0 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_0_DB (0x1 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_5_DB (0x2 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_0_DB (0x3 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_5_DB (0x4 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB (0x5 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_5_DB (0x6 << 24) +#define IMX6Q_GPR13_SATA_RX_EQ_VAL_4_0_DB (0x7 << 24) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK (0x1f << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1I (0x10 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1M (0x10 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1X (0x1a << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2I (0x12 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M (0x12 << 19) +#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2X (0x1a << 19) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK (0x7 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_1F (0x0 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_2F (0x1 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_4F (0x2 << 16) +#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F (0x3 << 16) +#define IMX6Q_GPR13_SATA_SPD_MODE_MASK BIT(15) +#define IMX6Q_GPR13_SATA_SPD_MODE_1P5G 0x0 +#define IMX6Q_GPR13_SATA_SPD_MODE_3P0G BIT(15) +#define IMX6Q_GPR13_SATA_MPLL_SS_EN BIT(14) +#define IMX6Q_GPR13_SATA_TX_ATTEN_MASK (0x7 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_16_16 (0x0 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_14_16 (0x1 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_12_16 (0x2 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_10_16 (0x3 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_9_16 (0x4 << 11) +#define IMX6Q_GPR13_SATA_TX_ATTEN_8_16 (0x5 << 11) +#define IMX6Q_GPR13_SATA_TX_BOOST_MASK (0xf << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_0_00_DB (0x0 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_0_37_DB (0x1 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_0_74_DB (0x2 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_1_11_DB (0x3 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_1_48_DB (0x4 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_1_85_DB (0x5 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_2_22_DB (0x6 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_2_59_DB (0x7 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_2_96_DB (0x8 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB (0x9 << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_3_70_DB (0xa << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_4_07_DB (0xb << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_4_44_DB (0xc << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_4_81_DB (0xd << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_5_28_DB (0xe << 7) +#define IMX6Q_GPR13_SATA_TX_BOOST_5_75_DB (0xf << 7) +#define IMX6Q_GPR13_SATA_TX_LVL_MASK (0x1f << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_937_V (0x00 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_947_V (0x01 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_957_V (0x02 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_966_V (0x03 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_976_V (0x04 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_986_V (0x05 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_0_996_V (0x06 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_005_V (0x07 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_015_V (0x08 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_025_V (0x09 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_035_V (0x0a << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_045_V (0x0b << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_054_V (0x0c << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_064_V (0x0d << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_074_V (0x0e << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_084_V (0x0f << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_094_V (0x10 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_104_V (0x11 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_113_V (0x12 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_123_V (0x13 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_133_V (0x14 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_143_V (0x15 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_152_V (0x16 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_162_V (0x17 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_172_V (0x18 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_182_V (0x19 << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_191_V (0x1a << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_201_V (0x1b << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_211_V (0x1c << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_221_V (0x1d << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_230_V (0x1e << 2) +#define IMX6Q_GPR13_SATA_TX_LVL_1_240_V (0x1f << 2) +#define IMX6Q_GPR13_SATA_MPLL_CLK_EN BIT(1) +#define IMX6Q_GPR13_SATA_TX_EDGE_RATE BIT(0) #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */ diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h index 9eb515bb799d..1706b3597ce0 100644 --- a/include/linux/platform_data/mmc-pxamci.h +++ b/include/linux/platform_data/mmc-pxamci.h @@ -12,7 +12,7 @@ struct pxamci_platform_data { unsigned long detect_delay_ms; /* delay in millisecond before detecting cards after interrupt */ int (*init)(struct device *, irq_handler_t , void *); int (*get_ro)(struct device *); - void (*setpower)(struct device *, unsigned int); + int (*setpower)(struct device *, unsigned int); void (*exit)(struct device *, void *); int gpio_card_detect; /* gpio detecting card insertion */ int gpio_card_ro; /* gpio detecting read only toggle */ diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index 382cf710ca9a..5b1c9848124c 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -124,6 +124,10 @@ void shdma_chan_remove(struct shdma_chan *schan); int shdma_init(struct device *dev, struct shdma_dev *sdev, int chan_num); void shdma_cleanup(struct shdma_dev *sdev); +#if IS_ENABLED(CONFIG_SH_DMAE_BASE) bool shdma_chan_filter(struct dma_chan *chan, void *arg); +#else +#define shdma_chan_filter NULL +#endif #endif diff --git a/include/linux/usb.h b/include/linux/usb.h index a232b7ece1f6..0eec2689b955 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -367,17 +367,6 @@ struct usb_bus { /* ----------------------------------------------------------------------- */ -/* This is arbitrary. - * From USB 2.0 spec Table 11-13, offset 7, a hub can - * have up to 255 ports. The most yet reported is 10. - * - * Current Wireless USB host hardware (Intel i1480 for example) allows - * up to 22 devices to connect. Upcoming hardware might raise that - * limit. Because the arrays need to add a bit for hub status data, we - * do 31, so plus one evens out to four bytes. - */ -#define USB_MAXCHILDREN (31) - struct usb_tt; enum usb_device_removable { diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 3cc5a0b278c3..5ebda976ea93 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -9,9 +9,7 @@ struct search; DECLARE_EVENT_CLASS(bcache_request, - TP_PROTO(struct search *s, struct bio *bio), - TP_ARGS(s, bio), TP_STRUCT__entry( @@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request, __field(dev_t, orig_sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) ), TP_fast_assign( @@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request, __entry->orig_sector = bio->bi_sector - 16; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)", + TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm, - __entry->orig_major, __entry->orig_minor, + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->orig_major, __entry->orig_minor, (unsigned long long)__entry->orig_sector) ); -DEFINE_EVENT(bcache_request, bcache_request_start, +DECLARE_EVENT_CLASS(bkey, + TP_PROTO(struct bkey *k), + TP_ARGS(k), - TP_PROTO(struct search *s, struct bio *bio), + TP_STRUCT__entry( + __field(u32, size ) + __field(u32, inode ) + __field(u64, offset ) + __field(bool, dirty ) + ), - TP_ARGS(s, bio) + TP_fast_assign( + __entry->inode = KEY_INODE(k); + __entry->offset = KEY_OFFSET(k); + __entry->size = KEY_SIZE(k); + __entry->dirty = KEY_DIRTY(k); + ), + + TP_printk("%u:%llu len %u dirty %u", __entry->inode, + __entry->offset, __entry->size, __entry->dirty) ); -DEFINE_EVENT(bcache_request, bcache_request_end, +DECLARE_EVENT_CLASS(btree_node, + TP_PROTO(struct btree *b), + TP_ARGS(b), + + TP_STRUCT__entry( + __field(size_t, bucket ) + ), + TP_fast_assign( + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + ), + + TP_printk("bucket %zu", __entry->bucket) +); + +/* request.c */ + +DEFINE_EVENT(bcache_request, bcache_request_start, TP_PROTO(struct search *s, struct bio *bio), + TP_ARGS(s, bio) +); +DEFINE_EVENT(bcache_request, bcache_request_end, + TP_PROTO(struct search *s, struct bio *bio), TP_ARGS(s, bio) ); DECLARE_EVENT_CLASS(bcache_bio, - TP_PROTO(struct bio *bio), - TP_ARGS(bio), TP_STRUCT__entry( @@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio, __field(sector_t, sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) ), TP_fast_assign( @@ -78,191 +104,328 @@ DECLARE_EVENT_CLASS(bcache_bio, __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_printk("%d,%d %s %llu + %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, __entry->nr_sector) ); - -DEFINE_EVENT(bcache_bio, bcache_passthrough, - +DEFINE_EVENT(bcache_bio, bcache_bypass_sequential, TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); +DEFINE_EVENT(bcache_bio, bcache_bypass_congested, + TP_PROTO(struct bio *bio), TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_cache_hit, +TRACE_EVENT(bcache_read, + TP_PROTO(struct bio *bio, bool hit, bool bypass), + TP_ARGS(bio, hit, bypass), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(bool, cache_hit ) + __field(bool, bypass ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->cache_hit = hit; + __entry->bypass = bypass; + ), + + TP_printk("%d,%d %s %llu + %u hit %u bypass %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->cache_hit, __entry->bypass) ); -DEFINE_EVENT(bcache_bio, bcache_cache_miss, +TRACE_EVENT(bcache_write, + TP_PROTO(struct bio *bio, bool writeback, bool bypass), + TP_ARGS(bio, writeback, bypass), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(bool, writeback ) + __field(bool, bypass ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->writeback = writeback; + __entry->bypass = bypass; + ), + + TP_printk("%d,%d %s %llu + %u hit %u bypass %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->writeback, __entry->bypass) ); DEFINE_EVENT(bcache_bio, bcache_read_retry, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_writethrough, +DEFINE_EVENT(bkey, bcache_cache_insert, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_PROTO(struct bio *bio), +/* Journal */ - TP_ARGS(bio) -); +DECLARE_EVENT_CLASS(cache_set, + TP_PROTO(struct cache_set *c), + TP_ARGS(c), -DEFINE_EVENT(bcache_bio, bcache_writeback, + TP_STRUCT__entry( + __array(char, uuid, 16 ) + ), - TP_PROTO(struct bio *bio), + TP_fast_assign( + memcpy(__entry->uuid, c->sb.set_uuid, 16); + ), - TP_ARGS(bio) + TP_printk("%pU", __entry->uuid) ); -DEFINE_EVENT(bcache_bio, bcache_write_skip, - - TP_PROTO(struct bio *bio), +DEFINE_EVENT(bkey, bcache_journal_replay_key, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(bio) +DEFINE_EVENT(cache_set, bcache_journal_full, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) ); -DEFINE_EVENT(bcache_bio, bcache_btree_read, +DEFINE_EVENT(cache_set, bcache_journal_entry_full, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); +DEFINE_EVENT(bcache_bio, bcache_journal_write, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_btree_write, +/* Btree */ - TP_PROTO(struct bio *bio), +DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_ARGS(bio) +DEFINE_EVENT(btree_node, bcache_btree_read, + TP_PROTO(struct btree *b), + TP_ARGS(b) ); -DEFINE_EVENT(bcache_bio, bcache_write_dirty, +TRACE_EVENT(bcache_btree_write, + TP_PROTO(struct btree *b), + TP_ARGS(b), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(size_t, bucket ) + __field(unsigned, block ) + __field(unsigned, keys ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->block = b->written; + __entry->keys = b->sets[b->nsets].data->keys; + ), + + TP_printk("bucket %zu", __entry->bucket) ); -DEFINE_EVENT(bcache_bio, bcache_read_dirty, +DEFINE_EVENT(btree_node, bcache_btree_node_alloc, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_PROTO(struct bio *bio), +DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_ARGS(bio) +DEFINE_EVENT(btree_node, bcache_btree_node_free, + TP_PROTO(struct btree *b), + TP_ARGS(b) ); -DEFINE_EVENT(bcache_bio, bcache_write_moving, +TRACE_EVENT(bcache_btree_gc_coalesce, + TP_PROTO(unsigned nodes), + TP_ARGS(nodes), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(unsigned, nodes ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->nodes = nodes; + ), + + TP_printk("coalesced %u nodes", __entry->nodes) ); -DEFINE_EVENT(bcache_bio, bcache_read_moving, +DEFINE_EVENT(cache_set, bcache_gc_start, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_PROTO(struct bio *bio), +DEFINE_EVENT(cache_set, bcache_gc_end, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_ARGS(bio) +DEFINE_EVENT(bkey, bcache_gc_copy, + TP_PROTO(struct bkey *k), + TP_ARGS(k) ); -DEFINE_EVENT(bcache_bio, bcache_journal_write, +DEFINE_EVENT(bkey, bcache_gc_copy_collision, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_PROTO(struct bio *bio), +TRACE_EVENT(bcache_btree_insert_key, + TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status), + TP_ARGS(b, k, op, status), - TP_ARGS(bio) -); + TP_STRUCT__entry( + __field(u64, btree_node ) + __field(u32, btree_level ) + __field(u32, inode ) + __field(u64, offset ) + __field(u32, size ) + __field(u8, dirty ) + __field(u8, op ) + __field(u8, status ) + ), -DECLARE_EVENT_CLASS(bcache_cache_bio, + TP_fast_assign( + __entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->btree_level = b->level; + __entry->inode = KEY_INODE(k); + __entry->offset = KEY_OFFSET(k); + __entry->size = KEY_SIZE(k); + __entry->dirty = KEY_DIRTY(k); + __entry->op = op; + __entry->status = status; + ), - TP_PROTO(struct bio *bio, - sector_t orig_sector, - struct block_device* orig_bdev), + TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u", + __entry->status, __entry->op, + __entry->btree_node, __entry->btree_level, + __entry->inode, __entry->offset, + __entry->size, __entry->dirty) +); - TP_ARGS(bio, orig_sector, orig_bdev), +DECLARE_EVENT_CLASS(btree_split, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys), TP_STRUCT__entry( - __field(dev_t, dev ) - __field(dev_t, orig_dev ) - __field(sector_t, sector ) - __field(sector_t, orig_sector ) - __field(unsigned int, nr_sector ) - __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) + __field(size_t, bucket ) + __field(unsigned, keys ) ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->orig_dev = orig_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->orig_sector = orig_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->keys = keys; ), - TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm, - MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev), - (unsigned long long)__entry->orig_sector) + TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys) ); -DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert, - - TP_PROTO(struct bio *bio, - sector_t orig_sector, - struct block_device *orig_bdev), +DEFINE_EVENT(btree_split, bcache_btree_node_split, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys) +); - TP_ARGS(bio, orig_sector, orig_bdev) +DEFINE_EVENT(btree_split, bcache_btree_node_compact, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys) ); -DECLARE_EVENT_CLASS(bcache_gc, +DEFINE_EVENT(btree_node, bcache_btree_set_root, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_PROTO(uint8_t *uuid), +/* Allocator */ - TP_ARGS(uuid), +TRACE_EVENT(bcache_alloc_invalidate, + TP_PROTO(struct cache *ca), + TP_ARGS(ca), TP_STRUCT__entry( - __field(uint8_t *, uuid) + __field(unsigned, free ) + __field(unsigned, free_inc ) + __field(unsigned, free_inc_size ) + __field(unsigned, unused ) ), TP_fast_assign( - __entry->uuid = uuid; + __entry->free = fifo_used(&ca->free); + __entry->free_inc = fifo_used(&ca->free_inc); + __entry->free_inc_size = ca->free_inc.size; + __entry->unused = fifo_used(&ca->unused); ), - TP_printk("%pU", __entry->uuid) + TP_printk("free %u free_inc %u/%u unused %u", __entry->free, + __entry->free_inc, __entry->free_inc_size, __entry->unused) ); +TRACE_EVENT(bcache_alloc_fail, + TP_PROTO(struct cache *ca), + TP_ARGS(ca), -DEFINE_EVENT(bcache_gc, bcache_gc_start, + TP_STRUCT__entry( + __field(unsigned, free ) + __field(unsigned, free_inc ) + __field(unsigned, unused ) + __field(unsigned, blocked ) + ), - TP_PROTO(uint8_t *uuid), + TP_fast_assign( + __entry->free = fifo_used(&ca->free); + __entry->free_inc = fifo_used(&ca->free_inc); + __entry->unused = fifo_used(&ca->unused); + __entry->blocked = atomic_read(&ca->set->prio_blocked); + ), - TP_ARGS(uuid) + TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free, + __entry->free_inc, __entry->unused, __entry->blocked) ); -DEFINE_EVENT(bcache_gc, bcache_gc_end, +/* Background writeback */ - TP_PROTO(uint8_t *uuid), +DEFINE_EVENT(bkey, bcache_writeback, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(uuid) +DEFINE_EVENT(bkey, bcache_writeback_collision, + TP_PROTO(struct bkey *k), + TP_ARGS(k) ); #endif /* _TRACE_BCACHE_H */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index d615f78cc6b6..41a6643e2136 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -670,10 +670,6 @@ perf_trace_##call(void *__data, proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE, \ - "profile buffer not large enough")) \ - return; \ - \ entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \ __entry_size, event_call->event.type, &__regs, &rctx); \ if (!entry) \ diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h index 7692dc69ccf7..331499d597fa 100644 --- a/include/uapi/linux/usb/ch11.h +++ b/include/uapi/linux/usb/ch11.h @@ -11,6 +11,17 @@ #include <linux/types.h> /* __u8 etc */ +/* This is arbitrary. + * From USB 2.0 spec Table 11-13, offset 7, a hub can + * have up to 255 ports. The most yet reported is 10. + * + * Current Wireless USB host hardware (Intel i1480 for example) allows + * up to 22 devices to connect. Upcoming hardware might raise that + * limit. Because the arrays need to add a bit for hub status data, we + * use 31, so plus one evens out to four bytes. + */ +#define USB_MAXCHILDREN 31 + /* * Hub request types */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ffd4652de91c..65e12099ef89 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -103,12 +103,46 @@ typedef uint64_t blkif_sector_t; #define BLKIF_OP_DISCARD 5 /* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * This pages are filled with an array of blkif_request_segment_aligned + * that hold the information about the segments. The number of indirect + * pages to use is determined by the maximum number of segments + * a indirect request contains. Every indirect page can contain a maximum + * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), + * so to calculate the number of indirect pages to use we have to do + * ceil(indirect_segments/512). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +struct blkif_request_segment_aligned { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; + uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ +} __attribute__((__packed__)); + struct blkif_request_rw { uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ @@ -147,12 +181,31 @@ struct blkif_request_other { uint64_t id; /* private guest value, echoed in resp */ } __attribute__((__packed__)); +struct blkif_request_indirect { + uint8_t indirect_op; + uint16_t nr_segments; +#ifdef CONFIG_X86_64 + uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ +#endif + uint64_t id; + blkif_sector_t sector_number; + blkif_vdev_t handle; + uint16_t _pad2; + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef CONFIG_X86_64 + uint32_t _pad3; /* make it 64 byte aligned */ +#else + uint64_t _pad3; /* make it 64 byte aligned */ +#endif +} __attribute__((__packed__)); + struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; struct blkif_request_other other; + struct blkif_request_indirect indirect; } u; } __attribute__((__packed__)); diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 75271b9a8f61..7d28aff605c7 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -188,6 +188,11 @@ struct __name##_back_ring { \ #define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + + #define RING_PUSH_REQUESTS(_r) do { \ wmb(); /* back sees requests /before/ updated producer index */ \ (_r)->sring->req_prod = (_r)->req_prod_pvt; \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0e0b20b8c5db..789ec4683db3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1845,36 +1845,43 @@ out: EXPORT_SYMBOL_GPL(cgroup_path); /** - * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy + * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task - * @hierarchy_id: the hierarchy to look up @task's cgroup from * @buf: the buffer to write the path into * @buflen: the length of the buffer * - * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and - * copy its path into @buf. This function grabs cgroup_mutex and shouldn't - * be used inside locks used by cgroup controller callbacks. + * Determine @task's cgroup on the first (the one with the lowest non-zero + * hierarchy_id) cgroup hierarchy and copy its path into @buf. This + * function grabs cgroup_mutex and shouldn't be used inside locks used by + * cgroup controller callbacks. + * + * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short. */ -int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, - char *buf, size_t buflen) +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) { struct cgroupfs_root *root; - struct cgroup *cgrp = NULL; - int ret = -ENOENT; + struct cgroup *cgrp; + int hierarchy_id = 1, ret = 0; + + if (buflen < 2) + return -ENAMETOOLONG; mutex_lock(&cgroup_mutex); - root = idr_find(&cgroup_hierarchy_idr, hierarchy_id); + root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); + if (root) { cgrp = task_cgroup_from_root(task, root); ret = cgroup_path(cgrp, buf, buflen); + } else { + /* if no hierarchy exists, everyone is in "/" */ + memcpy(buf, "/", 2); } mutex_unlock(&cgroup_mutex); - return ret; } -EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy); +EXPORT_SYMBOL_GPL(task_cgroup_path); /* * Control Group taskset diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 67708f46baae..8ce9eefc5bb4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1441,12 +1441,22 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, * the hashes are freed with call_rcu_sched(). */ static int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { struct ftrace_hash *filter_hash; struct ftrace_hash *notrace_hash; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* + * There's a small race when adding ops that the ftrace handler + * that wants regs, may be called without them. We can not + * allow that handler to be called if regs is NULL. + */ + if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS)) + return 0; +#endif + filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); @@ -4218,7 +4228,7 @@ static inline void ftrace_startup_enable(int command) { } # define ftrace_shutdown_sysctl() do { } while (0) static inline int -ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) +ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { return 1; } @@ -4241,7 +4251,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, do_for_each_ftrace_op(op, ftrace_control_list) { if (!(op->flags & FTRACE_OPS_FL_STUB) && !ftrace_function_local_disabled(op) && - ftrace_ops_test(op, ip)) + ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); trace_recursion_clear(TRACE_CONTROL_BIT); @@ -4274,7 +4284,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip)) + if (ftrace_ops_test(op, ip, regs)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); preempt_enable_notrace(); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e444ff88f0a4..cc2f66f68dc5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s) { int ret; - ret = trace_seq_printf(s, "# compressed entry header\n"); - ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); - ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); - ret = trace_seq_printf(s, "\tarray : 32 bits\n"); - ret = trace_seq_printf(s, "\n"); + ret = trace_seq_puts(s, "# compressed entry header\n"); + ret = trace_seq_puts(s, "\ttype_len : 5 bits\n"); + ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n"); + ret = trace_seq_puts(s, "\tarray : 32 bits\n"); + ret = trace_seq_putc(s, '\n'); ret = trace_seq_printf(s, "\tpadding : type == %d\n", RINGBUF_TYPE_PADDING); ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", @@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer, } /** - * check_pages - integrity check of buffer pages + * rb_check_pages - integrity check of buffer pages * @cpu_buffer: CPU buffer with pages to test * * As a safety measure we check to make sure the data pages have not @@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self, #endif /** - * ring_buffer_alloc - allocate a new ring_buffer + * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work) * ring_buffer_resize - resize the ring buffer * @buffer: the buffer to resize. * @size: the new size. + * @cpu_id: the cpu buffer to resize * * Minimum size is 2 * BUF_PAGE_SIZE. * @@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume); * expected. * * After a sequence of ring_buffer_read_prepare calls, the user is - * expected to make at least one call to ring_buffer_prepare_sync. + * expected to make at least one call to ring_buffer_read_prepare_sync. * Afterwards, ring_buffer_read_start is invoked to get things going * for real. * - * This overall must be paired with ring_buffer_finish. + * This overall must be paired with ring_buffer_read_finish. */ struct ring_buffer_iter * ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) @@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); * an intervening ring_buffer_read_prepare_sync must have been * performed. * - * Must be paired with ring_buffer_finish. + * Must be paired with ring_buffer_read_finish. */ void ring_buffer_read_start(struct ring_buffer_iter *iter) @@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter) EXPORT_SYMBOL_GPL(ring_buffer_read_start); /** - * ring_buffer_finish - finish reading the iterator of the buffer + * ring_buffer_read_finish - finish reading the iterator of the buffer * @iter: The iterator retrieved by ring_buffer_start * * This re-enables the recording to the buffer, and frees the @@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); /** * ring_buffer_alloc_read_page - allocate a page to read from buffer * @buffer: the buffer to allocate for. + * @cpu: the cpu buffer to allocate. * * This function is used in conjunction with ring_buffer_read_page. * When reading a full page from the ring buffer, these functions @@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); * to swap with a page in the ring buffer. * * for example: - * rpage = ring_buffer_alloc_read_page(buffer); + * rpage = ring_buffer_alloc_read_page(buffer, cpu); * if (!rpage) * return error; * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0cd500bffd9b..882ec1dd1515 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1224,18 +1224,17 @@ void tracing_reset_current(int cpu) tracing_reset(&global_trace.trace_buffer, cpu); } +/* Must have trace_types_lock held */ void tracing_reset_all_online_cpus(void) { struct trace_array *tr; - mutex_lock(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { tracing_reset_online_cpus(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif } - mutex_unlock(&trace_types_lock); } #define SAVED_CMDLINES 128 @@ -2843,6 +2842,17 @@ static int s_show(struct seq_file *m, void *v) return 0; } +/* + * Should be used after trace_array_get(), trace_types_lock + * ensures that i_cdev was already initialized. + */ +static inline int tracing_get_cpu(struct inode *inode) +{ + if (inode->i_cdev) /* See trace_create_cpu_file() */ + return (long)inode->i_cdev - 1; + return RING_BUFFER_ALL_CPUS; +} + static const struct seq_operations tracer_seq_ops = { .start = s_start, .next = s_next, @@ -2851,9 +2861,9 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct trace_array *tr, struct trace_cpu *tc, - struct inode *inode, struct file *file, bool snapshot) +__tracing_open(struct inode *inode, struct file *file, bool snapshot) { + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int cpu; @@ -2894,8 +2904,8 @@ __tracing_open(struct trace_array *tr, struct trace_cpu *tc, iter->trace_buffer = &tr->trace_buffer; iter->snapshot = snapshot; iter->pos = -1; + iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); - iter->cpu_file = tc->cpu; /* Notify the tracer early; before we stop tracing. */ if (iter->trace && iter->trace->open) @@ -2971,45 +2981,22 @@ static int tracing_open_generic_tr(struct inode *inode, struct file *filp) filp->private_data = inode->i_private; return 0; - -} - -static int tracing_open_generic_tc(struct inode *inode, struct file *filp) -{ - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - - if (tracing_disabled) - return -ENODEV; - - if (trace_array_get(tr) < 0) - return -ENODEV; - - filp->private_data = inode->i_private; - - return 0; - } static int tracing_release(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; struct seq_file *m = file->private_data; struct trace_iterator *iter; - struct trace_array *tr; int cpu; - /* Writes do not use seq_file, need to grab tr from inode */ if (!(file->f_mode & FMODE_READ)) { - struct trace_cpu *tc = inode->i_private; - - trace_array_put(tc->tr); + trace_array_put(tr); return 0; } + /* Writes do not use seq_file */ iter = m->private; - tr = iter->tr; - trace_array_put(tr); - mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { @@ -3023,6 +3010,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (!iter->snapshot) /* reenable tracing if it was previously enabled */ tracing_start_tr(tr); + + __trace_array_put(tr); + mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -3042,15 +3032,6 @@ static int tracing_release_generic_tr(struct inode *inode, struct file *file) return 0; } -static int tracing_release_generic_tc(struct inode *inode, struct file *file) -{ - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; - - trace_array_put(tr); - return 0; -} - static int tracing_single_release_tr(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -3062,8 +3043,7 @@ static int tracing_single_release_tr(struct inode *inode, struct file *file) static int tracing_open(struct inode *inode, struct file *file) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret = 0; @@ -3071,16 +3051,17 @@ static int tracing_open(struct inode *inode, struct file *file) return -ENODEV; /* If this file was open for write, then erase contents */ - if ((file->f_mode & FMODE_WRITE) && - (file->f_flags & O_TRUNC)) { - if (tc->cpu == RING_BUFFER_ALL_CPUS) + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + int cpu = tracing_get_cpu(inode); + + if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->trace_buffer); else - tracing_reset(&tr->trace_buffer, tc->cpu); + tracing_reset(&tr->trace_buffer, cpu); } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(tr, tc, inode, file, false); + iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) @@ -3447,6 +3428,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, static int tracing_trace_options_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; + int ret; if (tracing_disabled) return -ENODEV; @@ -3454,7 +3436,11 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file) if (trace_array_get(tr) < 0) return -ENODEV; - return single_open(file, tracing_trace_options_show, inode->i_private); + ret = single_open(file, tracing_trace_options_show, inode->i_private); + if (ret < 0) + trace_array_put(tr); + + return ret; } static const struct file_operations tracing_iter_fops = { @@ -3537,14 +3523,14 @@ static const char readme_msg[] = "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" "\t\t\t Read the contents for more information\n" #endif -#ifdef CONFIG_STACKTRACE +#ifdef CONFIG_STACK_TRACER " stack_trace\t\t- Shows the max stack trace when active\n" " stack_max_size\t- Shows current max stack size that was traced\n" "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" #ifdef CONFIG_DYNAMIC_FTRACE " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" #endif -#endif /* CONFIG_STACKTRACE */ +#endif /* CONFIG_STACK_TRACER */ ; static ssize_t @@ -3941,8 +3927,7 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, static int tracing_open_pipe(struct inode *inode, struct file *filp) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret = 0; @@ -3958,6 +3943,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { ret = -ENOMEM; + __trace_array_put(tr); goto out; } @@ -3987,9 +3973,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (trace_clocks[tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - iter->cpu_file = tc->cpu; - iter->tr = tc->tr; - iter->trace_buffer = &tc->tr->trace_buffer; + iter->tr = tr; + iter->trace_buffer = &tr->trace_buffer; + iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); filp->private_data = iter; @@ -4012,8 +3998,7 @@ fail: static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; mutex_lock(&trace_types_lock); @@ -4366,15 +4351,16 @@ static ssize_t tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; - struct trace_array *tr = tc->tr; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; + int cpu = tracing_get_cpu(inode); char buf[64]; int r = 0; ssize_t ret; mutex_lock(&trace_types_lock); - if (tc->cpu == RING_BUFFER_ALL_CPUS) { + if (cpu == RING_BUFFER_ALL_CPUS) { int cpu, buf_size_same; unsigned long size; @@ -4401,7 +4387,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf, } else r = sprintf(buf, "X\n"); } else - r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10); + r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); @@ -4413,7 +4399,8 @@ static ssize_t tracing_entries_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; unsigned long val; int ret; @@ -4427,8 +4414,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, /* value is in KB */ val <<= 10; - - ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu); + ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); if (ret < 0) return ret; @@ -4689,8 +4675,7 @@ struct ftrace_buffer_info { #ifdef CONFIG_TRACER_SNAPSHOT static int tracing_snapshot_open(struct inode *inode, struct file *file) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct trace_iterator *iter; struct seq_file *m; int ret = 0; @@ -4699,26 +4684,29 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) return -ENODEV; if (file->f_mode & FMODE_READ) { - iter = __tracing_open(tr, tc, inode, file, true); + iter = __tracing_open(inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { /* Writes still need the seq_file to hold the private data */ + ret = -ENOMEM; m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) - return -ENOMEM; + goto out; iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { kfree(m); - return -ENOMEM; + goto out; } + ret = 0; + iter->tr = tr; - iter->trace_buffer = &tc->tr->max_buffer; - iter->cpu_file = tc->cpu; + iter->trace_buffer = &tr->max_buffer; + iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; } - +out: if (ret < 0) trace_array_put(tr); @@ -4873,11 +4861,11 @@ static const struct file_operations tracing_pipe_fops = { }; static const struct file_operations tracing_entries_fops = { - .open = tracing_open_generic_tc, + .open = tracing_open_generic_tr, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, - .release = tracing_release_generic_tc, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_total_entries_fops = { @@ -4929,8 +4917,7 @@ static const struct file_operations snapshot_raw_fops = { static int tracing_buffers_open(struct inode *inode, struct file *filp) { - struct trace_cpu *tc = inode->i_private; - struct trace_array *tr = tc->tr; + struct trace_array *tr = inode->i_private; struct ftrace_buffer_info *info; int ret; @@ -4948,10 +4935,8 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) mutex_lock(&trace_types_lock); - tr->ref++; - info->iter.tr = tr; - info->iter.cpu_file = tc->cpu; + info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; info->iter.trace_buffer = &tr->trace_buffer; info->spare = NULL; @@ -5268,14 +5253,14 @@ static ssize_t tracing_stats_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - struct trace_cpu *tc = filp->private_data; - struct trace_array *tr = tc->tr; + struct inode *inode = file_inode(filp); + struct trace_array *tr = inode->i_private; struct trace_buffer *trace_buf = &tr->trace_buffer; + int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; unsigned long long t; unsigned long usec_rem; - int cpu = tc->cpu; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) @@ -5328,9 +5313,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf, } static const struct file_operations tracing_stats_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_stats_read, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -5519,10 +5505,20 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) return tr->percpu_dir; } +static struct dentry * +trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, + void *data, long cpu, const struct file_operations *fops) +{ + struct dentry *ret = trace_create_file(name, mode, parent, data, fops); + + if (ret) /* See tracing_get_cpu() */ + ret->d_inode->i_cdev = (void *)(cpu + 1); + return ret; +} + static void tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) { - struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu); struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; char cpu_dir[30]; /* 30 characters should be more than enough */ @@ -5538,28 +5534,28 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) } /* per cpu trace_pipe */ - trace_create_file("trace_pipe", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_pipe_fops); + trace_create_cpu_file("trace_pipe", 0444, d_cpu, + tr, cpu, &tracing_pipe_fops); /* per cpu trace */ - trace_create_file("trace", 0644, d_cpu, - (void *)&data->trace_cpu, &tracing_fops); + trace_create_cpu_file("trace", 0644, d_cpu, + tr, cpu, &tracing_fops); - trace_create_file("trace_pipe_raw", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_buffers_fops); + trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, + tr, cpu, &tracing_buffers_fops); - trace_create_file("stats", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_stats_fops); + trace_create_cpu_file("stats", 0444, d_cpu, + tr, cpu, &tracing_stats_fops); - trace_create_file("buffer_size_kb", 0444, d_cpu, - (void *)&data->trace_cpu, &tracing_entries_fops); + trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, + tr, cpu, &tracing_entries_fops); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_file("snapshot", 0644, d_cpu, - (void *)&data->trace_cpu, &snapshot_fops); + trace_create_cpu_file("snapshot", 0644, d_cpu, + tr, cpu, &snapshot_fops); - trace_create_file("snapshot_raw", 0444, d_cpu, - (void *)&data->trace_cpu, &snapshot_raw_fops); + trace_create_cpu_file("snapshot_raw", 0444, d_cpu, + tr, cpu, &snapshot_raw_fops); #endif } @@ -5868,17 +5864,6 @@ struct dentry *trace_instance_dir; static void init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer); -static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf) -{ - int cpu; - - for_each_tracing_cpu(cpu) { - memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu)); - per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu; - per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr; - } -} - static int allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) { @@ -5896,8 +5881,6 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size return -ENOMEM; } - init_trace_buffers(tr, buf); - /* Allocate the first page for all buffers */ set_buffer_entries(&tr->trace_buffer, ring_buffer_size(tr->trace_buffer.buffer, 0)); @@ -5964,17 +5947,15 @@ static int new_instance_create(const char *name) if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; - /* Holder for file callbacks */ - tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS; - tr->trace_cpu.tr = tr; - tr->dir = debugfs_create_dir(name, trace_instance_dir); if (!tr->dir) goto out_free_tr; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { + debugfs_remove_recursive(tr->dir); goto out_free_tr; + } init_tracer_debugfs(tr, tr->dir); @@ -6120,13 +6101,13 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) tr, &tracing_iter_fops); trace_create_file("trace", 0644, d_tracer, - (void *)&tr->trace_cpu, &tracing_fops); + tr, &tracing_fops); trace_create_file("trace_pipe", 0444, d_tracer, - (void *)&tr->trace_cpu, &tracing_pipe_fops); + tr, &tracing_pipe_fops); trace_create_file("buffer_size_kb", 0644, d_tracer, - (void *)&tr->trace_cpu, &tracing_entries_fops); + tr, &tracing_entries_fops); trace_create_file("buffer_total_size_kb", 0444, d_tracer, tr, &tracing_total_entries_fops); @@ -6141,11 +6122,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) &trace_clock_fops); trace_create_file("tracing_on", 0644, d_tracer, - tr, &rb_simple_fops); + tr, &rb_simple_fops); #ifdef CONFIG_TRACER_SNAPSHOT trace_create_file("snapshot", 0644, d_tracer, - (void *)&tr->trace_cpu, &snapshot_fops); + tr, &snapshot_fops); #endif for_each_tracing_cpu(cpu) @@ -6439,10 +6420,6 @@ __init static int tracer_alloc_buffers(void) global_trace.flags = TRACE_ARRAY_FL_GLOBAL; - /* Holder for file callbacks */ - global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS; - global_trace.trace_cpu.tr = &global_trace; - INIT_LIST_HEAD(&global_trace.systems); INIT_LIST_HEAD(&global_trace.events); list_add(&global_trace.list, &ftrace_trace_arrays); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4a4f6e1828b6..afaae41b0a02 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -130,19 +130,12 @@ enum trace_flag_type { struct trace_array; -struct trace_cpu { - struct trace_array *tr; - struct dentry *dir; - int cpu; -}; - /* * The CPU trace array - it consists of thousands of trace entries * plus some other descriptor data: (for example which task started * the trace, etc.) */ struct trace_array_cpu { - struct trace_cpu trace_cpu; atomic_t disabled; void *buffer_page; /* ring buffer spare */ @@ -196,7 +189,6 @@ struct trace_array { bool allocated_snapshot; #endif int buffer_disabled; - struct trace_cpu trace_cpu; /* place holder */ #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; @@ -214,7 +206,6 @@ struct trace_array { struct dentry *event_dir; struct list_head systems; struct list_head events; - struct task_struct *waiter; int ref; }; @@ -680,6 +671,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +/* + * Tracer data references selftest functions that only occur + * on boot up. These can be __init functions. Thus, when selftests + * are enabled, then the tracers need to reference __init functions. + */ +#define __tracer_data __refdata +#else +/* Tracers are seldom changed. Optimize when selftests are disabled. */ +#define __tracer_data __read_mostly #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 84b1e045faba..80c36bcf66e8 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, + "perf buffer not large enough")) + return NULL; + pc = preempt_count(); *rctxp = perf_swevent_get_recursion_context(); @@ -266,6 +270,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct pt_regs regs; int rctx; + head = this_cpu_ptr(event_function.perf_events); + if (hlist_empty(head)) + return; + #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -279,8 +287,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - - head = this_cpu_ptr(event_function.perf_events); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, 1, ®s, head, NULL); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7d854290bf81..898f868833f2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -826,59 +826,33 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_event_call *call = m->private; - struct ftrace_event_field *field; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); + struct list_head *node = v; (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: - if (unlikely(list_empty(common_head))) - return NULL; - - field = list_entry(common_head->prev, - struct ftrace_event_field, link); - return field; + node = common_head; + break; case FORMAT_FIELD_SEPERATOR: - if (unlikely(list_empty(head))) - return NULL; - - field = list_entry(head->prev, struct ftrace_event_field, link); - return field; + node = head; + break; case FORMAT_PRINTFMT: /* all done */ return NULL; } - field = v; - if (field->link.prev == common_head) + node = node->prev; + if (node == common_head) return (void *)FORMAT_FIELD_SEPERATOR; - else if (field->link.prev == head) + else if (node == head) return (void *)FORMAT_PRINTFMT; - - field = list_entry(field->link.prev, struct ftrace_event_field, link); - - return field; -} - -static void *f_start(struct seq_file *m, loff_t *pos) -{ - loff_t l = 0; - void *p; - - /* Start by showing the header */ - if (!*pos) - return (void *)FORMAT_HEADER; - - p = (void *)FORMAT_HEADER; - do { - p = f_next(m, p, &l); - } while (p && l < *pos); - - return p; + else + return node; } static int f_show(struct seq_file *m, void *v) @@ -904,8 +878,7 @@ static int f_show(struct seq_file *m, void *v) return 0; } - field = v; - + field = list_entry(v, struct ftrace_event_field, link); /* * Smartly shows the array type(except dynamic array). * Normal: @@ -932,6 +905,17 @@ static int f_show(struct seq_file *m, void *v) return 0; } +static void *f_start(struct seq_file *m, loff_t *pos) +{ + void *p = (void *)FORMAT_HEADER; + loff_t l = 0; + + while (l < *pos && p) + p = f_next(m, p, &l); + + return p; +} + static void f_stop(struct seq_file *m, void *p) { } @@ -963,23 +947,14 @@ static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_call *call = filp->private_data; - struct trace_seq *s; - int r; + char buf[32]; + int len; if (*ppos) return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - - trace_seq_init(s); - trace_seq_printf(s, "%d\n", call->event.type); - - r = simple_read_from_buffer(ubuf, cnt, ppos, - s->buffer, s->len); - kfree(s); - return r; + len = sprintf(buf, "%d\n", call->event.type); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } static ssize_t @@ -1218,6 +1193,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { .start = t_start, @@ -1245,7 +1221,7 @@ static const struct file_operations ftrace_set_event_fops = { .read = seq_read, .write = ftrace_event_write, .llseek = seq_lseek, - .release = seq_release, + .release = ftrace_event_release, }; static const struct file_operations ftrace_enable_fops = { @@ -1323,6 +1299,15 @@ ftrace_event_open(struct inode *inode, struct file *file, return ret; } +static int ftrace_event_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + return seq_release(inode, file); +} + static int ftrace_event_avail_open(struct inode *inode, struct file *file) { @@ -1336,12 +1321,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file) { const struct seq_operations *seq_ops = &show_set_event_seq_ops; struct trace_array *tr = inode->i_private; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_clear_events(tr); - return ftrace_event_open(inode, file, seq_ops); + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; } static struct event_subsystem * diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 0d883dc057d6..0c7b75a8acc8 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -646,7 +646,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else - trace_seq_printf(s, "none\n"); + trace_seq_puts(s, "none\n"); mutex_unlock(&event_mutex); } @@ -660,7 +660,7 @@ void print_subsystem_event_filter(struct event_subsystem *system, if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else - trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); + trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); mutex_unlock(&event_mutex); } diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b863f93b30f3..38fe1483c508 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) return 0; } -static struct tracer function_trace __read_mostly = +static struct tracer function_trace __tracer_data = { .name = "function", .init = function_trace_init, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8388bc99f2ee..b5c09242683d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid) /* First spaces to align center */ for (i = 0; i < spaces / 2; i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid) /* Last spaces to align center */ for (i = 0; i < spaces - (spaces / 2); i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) ------------------------------------------ */ - ret = trace_seq_printf(s, + ret = trace_seq_puts(s, " ------------------------------------------\n"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " => "); + ret = trace_seq_puts(s, " => "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, + ret = trace_seq_puts(s, "\n ------------------------------------------\n\n"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, ret = print_graph_proc(s, pid); if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " | "); + ret = trace_seq_puts(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return ret; if (type == TRACE_GRAPH_ENT) - ret = trace_seq_printf(s, "==========>"); + ret = trace_seq_puts(s, "==========>"); else - ret = trace_seq_printf(s, "<=========="); + ret = trace_seq_puts(s, "<=========="); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, if (ret != TRACE_TYPE_HANDLED) return ret; - ret = trace_seq_printf(s, "\n"); + ret = trace_seq_putc(s, '\n'); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) len += strlen(nsecs_str); } - ret = trace_seq_printf(s, " us "); + ret = trace_seq_puts(s, " us "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; /* Print remaining spaces to fit the row's width */ for (i = len; i < 7; i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, /* No real adata, just filling the column with spaces */ switch (duration) { case DURATION_FILL_FULL: - ret = trace_seq_printf(s, " | "); + ret = trace_seq_puts(s, " | "); return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; case DURATION_FILL_START: - ret = trace_seq_printf(s, " "); + ret = trace_seq_puts(s, " "); return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; case DURATION_FILL_END: - ret = trace_seq_printf(s, " |"); + ret = trace_seq_puts(s, " |"); return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; } @@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { /* Duration exceeded 100 msecs */ if (duration > 100000ULL) - ret = trace_seq_printf(s, "! "); + ret = trace_seq_puts(s, "! "); /* Duration exceeded 10 msecs */ else if (duration > 10000ULL) - ret = trace_seq_printf(s, "+ "); + ret = trace_seq_puts(s, "+ "); } /* @@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, * to fill out the space. */ if (ret == -1) - ret = trace_seq_printf(s, " "); + ret = trace_seq_puts(s, " "); /* Catching here any failure happenned above */ if (!ret) @@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s, if (ret != TRACE_TYPE_HANDLED) return ret; - ret = trace_seq_printf(s, "| "); + ret = trace_seq_puts(s, "| "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, /* Function */ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter, /* Function */ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, if (ret == TRACE_TYPE_PARTIAL_LINE) return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " | "); + ret = trace_seq_puts(s, " | "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, /* Closing brace */ for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } @@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, * belongs to, write out the function name. */ if (func_match) { - ret = trace_seq_printf(s, "}\n"); + ret = trace_seq_puts(s, "}\n"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } else { @@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, /* Indentation */ if (depth > 0) for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { - ret = trace_seq_printf(s, " "); + ret = trace_seq_putc(s, ' '); if (!ret) return TRACE_TYPE_PARTIAL_LINE; } /* The comment */ - ret = trace_seq_printf(s, "/* "); + ret = trace_seq_puts(s, "/* "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, s->len--; } - ret = trace_seq_printf(s, " */\n"); + ret = trace_seq_puts(s, " */\n"); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = { .funcs = &graph_functions }; -static struct tracer graph_trace __read_mostly = { +static struct tracer graph_trace __tracer_data = { .name = "function_graph", .open = graph_trace_open, .pipe_open = graph_trace_open, diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ed6976493c8..3811487e7a7a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -243,11 +243,11 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) static int disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { + struct event_file_link *link = NULL; + int wait = 0; int ret = 0; if (file) { - struct event_file_link *link; - link = find_event_file_link(tp, file); if (!link) { ret = -EINVAL; @@ -255,10 +255,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) } list_del_rcu(&link->list); - /* synchronize with kprobe_trace_func/kretprobe_trace_func */ - synchronize_sched(); - kfree(link); - + wait = 1; if (!list_empty(&tp->files)) goto out; @@ -271,8 +268,22 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) disable_kretprobe(&tp->rp); else disable_kprobe(&tp->rp.kp); + wait = 1; } out: + if (wait) { + /* + * Synchronize with kprobe_trace_func/kretprobe_trace_func + * to ensure disabled (all running handlers are finished). + * This is not only for kfree(), but also the caller, + * trace_remove_event_call() supposes it for releasing + * event_call related objects, which will be accessed in + * the kprobe_trace_func/kretprobe_trace_func. + */ + synchronize_sched(); + kfree(link); /* Ignored if link == NULL */ + } + return ret; } @@ -1087,9 +1098,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "profile buffer not large enough")) - return; entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) @@ -1120,9 +1128,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, __size = sizeof(*entry) + tp->size + dsize; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "profile buffer not large enough")) - return; entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); if (!entry) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index a5e8f4878bfa..b3dcfb2f0fef 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) if (drv) ret += trace_seq_printf(s, " %s\n", drv->name); else - ret += trace_seq_printf(s, " \n"); + ret += trace_seq_puts(s, " \n"); return ret; } @@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter) struct header_iter *hiter; struct trace_seq *s = &iter->seq; - trace_seq_printf(s, "VERSION 20070824\n"); + trace_seq_puts(s, "VERSION 20070824\n"); hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); if (!hiter) @@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) (rw->value >> 0) & 0xff, rw->pc, 0); break; default: - ret = trace_seq_printf(s, "rw what?\n"); + ret = trace_seq_puts(s, "rw what?\n"); break; } if (ret) @@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) secs, usec_rem, m->map_id, 0UL, 0); break; default: - ret = trace_seq_printf(s, "map what?\n"); + ret = trace_seq_puts(s, "map what?\n"); break; } if (ret) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bb922d9ee51b..34e7cbac0c9c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%s", field->buf); + ret = trace_seq_puts(s, field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, if (ret) ret = trace_seq_puts(s, "??"); if (ret) - ret = trace_seq_puts(s, "\n"); + ret = trace_seq_putc(s, '\n'); continue; } if (!ret) break; if (ret) ret = seq_print_user_ip(s, mm, ip, sym_flags); - ret = trace_seq_puts(s, "\n"); + ret = trace_seq_putc(s, '\n'); } if (mm) @@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) int ret; if (!ip) - return trace_seq_printf(s, "0"); + return trace_seq_putc(s, '0'); if (sym_flags & TRACE_ITER_SYM_OFFSET) ret = seq_print_sym_offset(s, "%s", ip); @@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, goto partial; if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { - if (!trace_seq_printf(s, " <-")) + if (!trace_seq_puts(s, " <-")) goto partial; if (!seq_print_ip_sym(s, field->parent_ip, flags)) goto partial; } - if (!trace_seq_printf(s, "\n")) + if (!trace_seq_putc(s, '\n')) goto partial; return TRACE_TYPE_HANDLED; @@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, if (!seq_print_ip_sym(s, *p, flags)) goto partial; - if (!trace_seq_puts(s, "\n")) + if (!trace_seq_putc(s, '\n')) goto partial; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 322e16461072..8fd03657bc7d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags, entry = syscall_nr_to_meta(syscall); if (!entry) { - trace_seq_printf(s, "\n"); + trace_seq_putc(s, '\n'); return TRACE_TYPE_HANDLED; } @@ -566,15 +566,15 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; + head = this_cpu_ptr(sys_data->enter_event->perf_events); + if (hlist_empty(head)) + return; + /* get the size after alignment with the u32 buffer size field */ size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough")) - return; - rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, sys_data->enter_event->event.type, regs, &rctx); if (!rec) @@ -583,8 +583,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - - head = this_cpu_ptr(sys_data->enter_event->perf_events); perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } @@ -642,18 +640,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; + head = this_cpu_ptr(sys_data->exit_event->perf_events); + if (hlist_empty(head)) + return; + /* We can probably do that at build time */ size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - /* - * Impossible, but be paranoid with the future - * How to put this check outside runtime? - */ - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "exit event has grown above perf buffer size")) - return; - rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, sys_data->exit_event->event.type, regs, &rctx); if (!rec) @@ -661,8 +655,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - - head = this_cpu_ptr(sys_data->exit_event->perf_events); perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d5d0cd368a56..a23d2d71188e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -818,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu, size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); - if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) - return; preempt_disable(); head = this_cpu_ptr(call->perf_events); diff --git a/kernel/wait.c b/kernel/wait.c index ce0daa320a26..dec68bd4e9d8 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, prepare_to_wait(wq, &q->wait, mode); val = q->key.flags; if (atomic_read(val) == 0) - ret = (*action)(val); + break; + ret = (*action)(val); } while (!ret && atomic_read(val) != 0); finish_wait(wq, &q->wait); return ret; diff --git a/lib/Kconfig b/lib/Kconfig index 35da51359d40..71d9f81f6eed 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -66,8 +66,6 @@ config CRC16 config CRC_T10DIF tristate "CRC calculation for the T10 Data Integrity Field" - select CRYPTO - select CRYPTO_CRCT10DIF help This option is only needed if a module that's not in the kernel tree needs to calculate CRC checks for use with the diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index fe3428c07b47..fbbd66ed86cd 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -11,44 +11,57 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-t10dif.h> -#include <linux/err.h> -#include <linux/init.h> -#include <crypto/hash.h> -static struct crypto_shash *crct10dif_tfm; +/* Table generated using the following polynomium: + * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + * gt: 0x8bb7 + */ +static const __u16 t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; __u16 crc_t10dif(const unsigned char *buffer, size_t len) { - struct { - struct shash_desc shash; - char ctx[2]; - } desc; - int err; - - desc.shash.tfm = crct10dif_tfm; - desc.shash.flags = 0; - *(__u16 *)desc.ctx = 0; + __u16 crc = 0; + unsigned int i; - err = crypto_shash_update(&desc.shash, buffer, len); - BUG_ON(err); + for (i = 0 ; i < len ; i++) + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; - return *(__u16 *)desc.ctx; + return crc; } EXPORT_SYMBOL(crc_t10dif); -static int __init crc_t10dif_mod_init(void) -{ - crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); - return PTR_RET(crct10dif_tfm); -} - -static void __exit crc_t10dif_mod_fini(void) -{ - crypto_free_shash(crct10dif_tfm); -} - -module_init(crc_t10dif_mod_init); -module_exit(crc_t10dif_mod_fini); - MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index d411355f238e..aac511417ad1 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -151,15 +151,12 @@ do { \ #endif /* __a29k__ */ #if defined(__alpha) && W_TYPE_SIZE == 64 -#define umul_ppmm(ph, pl, m0, m1) \ -do { \ - UDItype __m0 = (m0), __m1 = (m1); \ - __asm__ ("umulh %r1,%2,%0" \ - : "=r" ((UDItype) ph) \ - : "%rJ" (__m0), \ - "rI" (__m1)); \ - (pl) = __m0 * __m1; \ - } while (0) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_alpha_umulh(__m0, __m1); \ + (pl) = __m0 * __m1; \ +} while (0) #define UMUL_TIME 46 #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ @@ -167,7 +164,7 @@ do { UDItype __r; \ (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) -extern UDItype __udiv_qrnnd(); +extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); #define UDIV_TIME 220 #endif /* LONGLONG_STANDALONE */ #endif /* __alpha */ diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index e2f83591161b..92b9b4324372 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -417,9 +417,11 @@ static void stac_update_outputs(struct hda_codec *codec) val &= ~spec->eapd_mask; else val |= spec->eapd_mask; - if (spec->gpio_data != val) + if (spec->gpio_data != val) { + spec->gpio_data = val; stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, val); + } } } @@ -3231,7 +3233,7 @@ static const struct hda_fixup stac927x_fixups[] = { /* configure the analog microphone on some laptops */ { 0x0c, 0x90a79130 }, /* correct the front output jack as a hp out */ - { 0x0f, 0x0227011f }, + { 0x0f, 0x0221101f }, /* correct the front input jack as a mic */ { 0x0e, 0x02a79130 }, {} @@ -3612,20 +3614,18 @@ static int stac_parse_auto_config(struct hda_codec *codec) static int stac_init(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; - unsigned int gpio; int i; /* override some hints */ stac_store_hints(codec); /* set up GPIO */ - gpio = spec->gpio_data; /* turn on EAPD statically when spec->eapd_switch isn't set. * otherwise, unsol event will turn it on/off dynamically */ if (!spec->eapd_switch) - gpio |= spec->eapd_mask; - stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, gpio); + spec->gpio_data |= spec->eapd_mask; + stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data); snd_hda_gen_init(codec); @@ -3915,6 +3915,7 @@ static void stac_setup_gpio(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; + spec->gpio_mask |= spec->eapd_mask; if (spec->gpio_led) { if (!spec->vref_mute_led_nid) { spec->gpio_mask |= spec->gpio_led; diff --git a/sound/soc/cirrus/ep93xx-ac97.c b/sound/soc/cirrus/ep93xx-ac97.c index ac73c607410a..04491f0e8d1b 100644 --- a/sound/soc/cirrus/ep93xx-ac97.c +++ b/sound/soc/cirrus/ep93xx-ac97.c @@ -102,13 +102,13 @@ static struct ep93xx_ac97_info *ep93xx_ac97_info; static struct ep93xx_dma_data ep93xx_ac97_pcm_out = { .name = "ac97-pcm-out", - .dma_port = EP93XX_DMA_AAC1, + .port = EP93XX_DMA_AAC1, .direction = DMA_MEM_TO_DEV, }; static struct ep93xx_dma_data ep93xx_ac97_pcm_in = { .name = "ac97-pcm-in", - .dma_port = EP93XX_DMA_AAC1, + .port = EP93XX_DMA_AAC1, .direction = DMA_DEV_TO_MEM, }; diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c index 3eeada57e87d..566a367c94fa 100644 --- a/sound/soc/codecs/max98088.c +++ b/sound/soc/codecs/max98088.c @@ -1612,7 +1612,7 @@ static int max98088_dai2_digital_mute(struct snd_soc_dai *codec_dai, int mute) static void max98088_sync_cache(struct snd_soc_codec *codec) { - u16 *reg_cache = codec->reg_cache; + u8 *reg_cache = codec->reg_cache; int i; if (!codec->cache_sync) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index d659d3adcfb3..6c8a9e7bee25 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1527,6 +1527,9 @@ static int sgtl5000_i2c_probe(struct i2c_client *client, if (IS_ERR(sgtl5000->mclk)) { ret = PTR_ERR(sgtl5000->mclk); dev_err(&client->dev, "Failed to get mclock: %d\n", ret); + /* Defer the probe to see if the clk will be provided later */ + if (ret == -ENOENT) + return -EPROBE_DEFER; return ret; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 0ec070cf7231..d82ee386eab5 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3908,10 +3908,8 @@ int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform, { /* create platform component name */ platform->name = fmt_single_name(dev, &platform->id); - if (platform->name == NULL) { - kfree(platform); + if (platform->name == NULL) return -ENOMEM; - } platform->dev = dev; platform->driver = platform_drv; diff --git a/sound/soc/tegra/tegra20_ac97.c b/sound/soc/tegra/tegra20_ac97.c index e58233f7df61..6c486625321b 100644 --- a/sound/soc/tegra/tegra20_ac97.c +++ b/sound/soc/tegra/tegra20_ac97.c @@ -389,9 +389,9 @@ static int tegra20_ac97_platform_probe(struct platform_device *pdev) ac97->capture_dma_data.slave_id = of_dma[1]; ac97->playback_dma_data.addr = mem->start + TEGRA20_AC97_FIFO_TX1; - ac97->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - ac97->capture_dma_data.maxburst = 4; - ac97->capture_dma_data.slave_id = of_dma[0]; + ac97->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + ac97->playback_dma_data.maxburst = 4; + ac97->playback_dma_data.slave_id = of_dma[1]; ret = tegra_asoc_utils_init(&ac97->util_data, &pdev->dev); if (ret) diff --git a/sound/soc/tegra/tegra20_spdif.c b/sound/soc/tegra/tegra20_spdif.c index 5eaa12cdc6eb..551b3c93ce93 100644 --- a/sound/soc/tegra/tegra20_spdif.c +++ b/sound/soc/tegra/tegra20_spdif.c @@ -323,8 +323,8 @@ static int tegra20_spdif_platform_probe(struct platform_device *pdev) } spdif->playback_dma_data.addr = mem->start + TEGRA20_SPDIF_DATA_OUT; - spdif->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - spdif->capture_dma_data.maxburst = 4; + spdif->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + spdif->playback_dma_data.maxburst = 4; spdif->playback_dma_data.slave_id = dmareq->start; pm_runtime_enable(&pdev->dev); diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index 2aa4e13063a8..3d2551cc10f2 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -543,7 +543,7 @@ static snd_pcm_uframes_t usb6fire_pcm_pointer( snd_pcm_uframes_t ret; if (rt->panic || !sub) - return SNDRV_PCM_STATE_XRUN; + return SNDRV_PCM_POS_XRUN; spin_lock_irqsave(&sub->lock, flags); ret = sub->dma_off; diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c index 6430ed2a9f65..c21a3df9a0df 100644 --- a/sound/usb/hiface/pcm.c +++ b/sound/usb/hiface/pcm.c @@ -503,7 +503,7 @@ static snd_pcm_uframes_t hiface_pcm_pointer(struct snd_pcm_substream *alsa_sub) snd_pcm_uframes_t dma_offset; if (rt->panic || !sub) - return SNDRV_PCM_STATE_XRUN; + return SNDRV_PCM_POS_XRUN; spin_lock_irqsave(&sub->lock, flags); dma_offset = sub->dma_off; diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index ca9fa4d32e07..07819bfa7dba 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1026,9 +1026,10 @@ kvp_get_ip_info(int family, char *if_name, int op, if (sn_offset == 0) strcpy(sn_str, cidr_mask); - else + else { + strcat((char *)ip_buffer->sub_net, ";"); strcat(sn_str, cidr_mask); - strcat((char *)ip_buffer->sub_net, ";"); + } sn_offset += strlen(sn_str) + 1; } |