summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab13
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst4
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt2
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt4
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,i2c.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-rcar.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-emev2.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,iic.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt)0
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,riic.txt (renamed from Documentation/devicetree/bindings/i2c/i2c-riic.txt)0
-rw-r--r--MAINTAINERS14
-rw-r--r--arch/Kconfig11
-rw-r--r--arch/alpha/include/asm/pgalloc.h2
-rw-r--r--arch/alpha/include/asm/pgtable.h5
-rw-r--r--arch/arc/include/asm/pgalloc.h1
-rw-r--r--arch/arc/include/asm/pgtable.h5
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/pgalloc.h2
-rw-r--r--arch/arm/include/asm/pgtable-nommu.h5
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm/include/asm/processor.h2
-rw-r--r--arch/arm/kernel/process.c5
-rw-r--r--arch/arm/mm/flush.c7
-rw-r--r--arch/arm/mm/mmap.c52
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/include/asm/pgalloc.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/kernel/process.c8
-rw-r--r--arch/arm64/mm/flush.c3
-rw-r--r--arch/arm64/mm/mmap.c72
-rw-r--r--arch/arm64/mm/pgd.c2
-rw-r--r--arch/c6x/include/asm/pgtable.h5
-rw-r--r--arch/csky/include/asm/pgalloc.h2
-rw-r--r--arch/csky/include/asm/pgtable.h5
-rw-r--r--arch/h8300/include/asm/pgtable.h6
-rw-r--r--arch/hexagon/include/asm/pgalloc.h2
-rw-r--r--arch/hexagon/include/asm/pgtable.h3
-rw-r--r--arch/hexagon/mm/Makefile2
-rw-r--r--arch/hexagon/mm/pgalloc.c10
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/include/asm/pgalloc.h52
-rw-r--r--arch/ia64/include/asm/pgtable.h5
-rw-r--r--arch/ia64/kernel/irq_ia64.c1
-rw-r--r--arch/ia64/mm/contig.c1
-rw-r--r--arch/ia64/mm/discontig.c2
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/m68k/include/asm/pgtable_mm.h7
-rw-r--r--arch/m68k/include/asm/pgtable_no.h7
-rw-r--r--arch/microblaze/Kconfig3
-rw-r--r--arch/microblaze/boot/dts/system.dts16
-rw-r--r--arch/microblaze/configs/mmu_defconfig22
-rw-r--r--arch/microblaze/configs/nommu_defconfig25
-rw-r--r--arch/microblaze/include/asm/io.h1
-rw-r--r--arch/microblaze/include/asm/pgalloc.h122
-rw-r--r--arch/microblaze/include/asm/pgtable.h7
-rw-r--r--arch/microblaze/include/asm/uaccess.h42
-rw-r--r--arch/microblaze/kernel/reset.c87
-rw-r--r--arch/microblaze/mm/consistent.c221
-rw-r--r--arch/microblaze/mm/pgtable.c4
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/include/asm/pgalloc.h2
-rw-r--r--arch/mips/include/asm/pgtable.h5
-rw-r--r--arch/mips/include/asm/processor.h5
-rw-r--r--arch/mips/mm/mmap.c84
-rw-r--r--arch/nds32/include/asm/pgalloc.h2
-rw-r--r--arch/nds32/include/asm/pgtable.h2
-rw-r--r--arch/nios2/include/asm/pgalloc.h2
-rw-r--r--arch/nios2/include/asm/pgtable.h2
-rw-r--r--arch/openrisc/include/asm/pgalloc.h2
-rw-r--r--arch/openrisc/include/asm/pgtable.h5
-rw-r--r--arch/parisc/include/asm/pgalloc.h2
-rw-r--r--arch/parisc/include/asm/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c2
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c7
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c207
-rw-r--r--arch/riscv/Kconfig12
-rw-r--r--arch/riscv/include/asm/pgalloc.h4
-rw-r--r--arch/riscv/include/asm/pgtable.h5
-rw-r--r--arch/s390/hypfs/inode.c137
-rw-r--r--arch/s390/include/asm/pgtable.h6
-rw-r--r--arch/sh/include/asm/pgalloc.h44
-rw-r--r--arch/sh/include/asm/pgtable.h5
-rw-r--r--arch/sh/mm/Kconfig3
-rw-r--r--arch/sh/mm/nommu.c4
-rw-r--r--arch/sparc/include/asm/pgalloc_32.h2
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h2
-rw-r--r--arch/sparc/include/asm/pgtable_32.h5
-rw-r--r--arch/sparc/include/asm/pgtable_64.h1
-rw-r--r--arch/sparc/mm/init_32.c1
-rw-r--r--arch/um/include/asm/pgalloc.h2
-rw-r--r--arch/um/include/asm/pgtable.h2
-rw-r--r--arch/unicore32/include/asm/pgalloc.h2
-rw-r--r--arch/unicore32/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/mm/pgtable.c6
-rw-r--r--arch/xtensa/include/asm/pgtable.h1
-rw-r--r--arch/xtensa/include/asm/tlbflush.h3
-rw-r--r--block/bfq-iosched.c35
-rw-r--r--block/blk-core.c7
-rw-r--r--block/blk-integrity.c11
-rw-r--r--block/blk-mq.c6
-rw-r--r--block/bsg-lib.c10
-rw-r--r--block/t10-pi.c169
-rw-r--r--drivers/acpi/acpi_apd.c7
-rw-r--r--drivers/ata/libahci_platform.c9
-rw-r--r--drivers/base/memory.c44
-rw-r--r--drivers/base/node.c55
-rw-r--r--drivers/block/nbd.c108
-rw-r--r--drivers/block/pktcdvd.c1
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_io.c5
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_drv.c18
-rw-r--r--drivers/gpu/drm/via/via_dmablit.c10
-rw-r--r--drivers/hid/hid-hyperv.c4
-rw-r--r--drivers/hv/channel_mgmt.c161
-rw-r--r--drivers/hv/connection.c8
-rw-r--r--drivers/hv/hv.c66
-rw-r--r--drivers/hv/hv_balloon.c143
-rw-r--r--drivers/hv/hyperv_vmbus.h30
-rw-r--r--drivers/hv/vmbus_drv.c265
-rw-r--r--drivers/i2c/busses/Kconfig17
-rw-r--r--drivers/i2c/busses/Makefile1
-rw-r--r--drivers/i2c/busses/i2c-axxia.c152
-rw-r--r--drivers/i2c/busses/i2c-bcm-iproc.c6
-rw-r--r--drivers/i2c/busses/i2c-bcm2835.c11
-rw-r--r--drivers/i2c/busses/i2c-cht-wc.c49
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c14
-rw-r--r--drivers/i2c/busses/i2c-designware-pcidrv.c41
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c22
-rw-r--r--drivers/i2c/busses/i2c-exynos5.c4
-rw-r--r--drivers/i2c/busses/i2c-fsi.c4
-rw-r--r--drivers/i2c/busses/i2c-hix5hd2.c3
-rw-r--r--drivers/i2c/busses/i2c-i801.c145
-rw-r--r--drivers/i2c/busses/i2c-icy.c230
-rw-r--r--drivers/i2c/busses/i2c-imx-lpi2c.c4
-rw-r--r--drivers/i2c/busses/i2c-imx.c17
-rw-r--r--drivers/i2c/busses/i2c-ismt.c2
-rw-r--r--drivers/i2c/busses/i2c-mxs.c4
-rw-r--r--drivers/i2c/busses/i2c-ocores.c5
-rw-r--r--drivers/i2c/busses/i2c-piix4.c34
-rw-r--r--drivers/i2c/busses/i2c-sprd.c25
-rw-r--r--drivers/i2c/busses/i2c-stm32f7.c2
-rw-r--r--drivers/i2c/busses/i2c-synquacer.c2
-rw-r--r--drivers/i2c/busses/i2c-taos-evm.c2
-rw-r--r--drivers/i2c/busses/i2c-tegra.c56
-rw-r--r--drivers/i2c/busses/i2c-uniphier-f.c26
-rw-r--r--drivers/i2c/busses/i2c-uniphier.c22
-rw-r--r--drivers/i2c/i2c-core-base.c20
-rw-r--r--drivers/i2c/i2c-slave-eeprom.c42
-rw-r--r--drivers/infiniband/core/umem.c5
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c5
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c10
-rw-r--r--drivers/md/dm-integrity.c10
-rw-r--r--drivers/media/i2c/adv748x/adv748x-core.c6
-rw-r--r--drivers/media/i2c/adv7604.c22
-rw-r--r--drivers/misc/eeprom/at24.c1
-rw-r--r--drivers/nvme/host/core.c9
-rw-r--r--drivers/platform/x86/Kconfig1
-rw-r--r--drivers/platform/x86/i2c-multi-instantiate.c2
-rw-r--r--drivers/platform/x86/pmc_atom.c7
-rw-r--r--drivers/scsi/sd.c8
-rw-r--r--drivers/staging/android/ion/ion_system_heap.c4
-rw-r--r--drivers/target/tcm_fc/tfc_io.c3
-rw-r--r--drivers/usb/gadget/function/f_fs.c233
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c8
-rw-r--r--drivers/watchdog/iTCO_wdt.c26
-rw-r--r--fs/binfmt_elf.c20
-rw-r--r--fs/fat/dir.c13
-rw-r--r--fs/fat/fatent.c3
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/ops_fstype.c495
-rw-r--r--fs/gfs2/super.c333
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/inode.c3
-rw-r--r--fs/io_uring.c243
-rw-r--r--fs/iomap/direct-io.c24
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/transaction.c12
-rw-r--r--fs/ocfs2/alloc.c20
-rw-r--r--fs/ocfs2/aops.c13
-rw-r--r--fs/ocfs2/blockcheck.c26
-rw-r--r--fs/ocfs2/cluster/heartbeat.c103
-rw-r--r--fs/ocfs2/dir.c3
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c55
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h16
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c7
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c23
-rw-r--r--fs/ocfs2/dlmglue.c27
-rw-r--r--fs/ocfs2/extent_map.c3
-rw-r--r--fs/ocfs2/file.c13
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/journal.h42
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/super.c10
-rw-r--r--fs/open.c8
-rw-r--r--fs/proc/meminfo.c8
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/xfs/xfs_file.c14
-rw-r--r--include/asm-generic/pgalloc.h5
-rw-r--r--include/asm-generic/pgtable.h7
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/compaction.h22
-rw-r--r--include/linux/fs.h32
-rw-r--r--include/linux/huge_mm.h9
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/hyperv.h16
-rw-r--r--include/linux/i2c.h2
-rw-r--r--include/linux/iomap.h10
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/khugepaged.h12
-rw-r--r--include/linux/memcontrol.h23
-rw-r--r--include/linux/memory.h7
-rw-r--r--include/linux/mm.h37
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/mmzone.h14
-rw-r--r--include/linux/page_ext.h1
-rw-r--r--include/linux/pagemap.h10
-rw-r--r--include/linux/quicklist.h94
-rw-r--r--include/linux/shrinker.h7
-rw-r--r--include/linux/slab.h62
-rw-r--r--include/linux/t10-pi.h14
-rw-r--r--include/linux/vmalloc.h20
-rw-r--r--include/linux/zpool.h3
-rw-r--r--include/uapi/linux/io_uring.h2
-rw-r--r--init/main.c6
-rw-r--r--kernel/bpf/inode.c92
-rw-r--r--kernel/events/uprobes.c81
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/sched/idle.c1
-rw-r--r--kernel/sysctl.c6
-rw-r--r--lib/Kconfig.debug15
-rw-r--r--lib/Kconfig.kasan8
-rw-r--r--lib/iov_iter.c2
-rw-r--r--lib/show_mem.c5
-rw-r--r--lib/test_kasan.c41
-rw-r--r--mm/Kconfig16
-rw-r--r--mm/Kconfig.debug4
-rw-r--r--mm/Makefile4
-rw-r--r--mm/compaction.c50
-rw-r--r--mm/filemap.c168
-rw-r--r--mm/gup.c125
-rw-r--r--mm/huge_memory.c123
-rw-r--r--mm/hugetlb.c89
-rw-r--r--mm/hugetlb_cgroup.c2
-rw-r--r--mm/init-mm.c2
-rw-r--r--mm/kasan/common.c32
-rw-r--r--mm/kasan/kasan.h14
-rw-r--r--mm/kasan/report.c44
-rw-r--r--mm/kasan/tags_report.c24
-rw-r--r--mm/khugepaged.c366
-rw-r--r--mm/kmemleak.c326
-rw-r--r--mm/ksm.c18
-rw-r--r--mm/madvise.c52
-rw-r--r--mm/memcontrol.c188
-rw-r--r--mm/memfd.c2
-rw-r--r--mm/memory.c13
-rw-r--r--mm/memory_hotplug.c103
-rw-r--r--mm/mempolicy.c4
-rw-r--r--mm/migrate.c13
-rw-r--r--mm/mmap.c12
-rw-r--r--mm/mmu_gather.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c24
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/page_owner.c123
-rw-r--r--mm/page_poison.c2
-rw-r--r--mm/page_vma_mapped.c3
-rw-r--r--mm/quicklist.c103
-rw-r--r--mm/rmap.c25
-rw-r--r--mm/shmem.c12
-rw-r--r--mm/slab.h64
-rw-r--r--mm/slab_common.c37
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c22
-rw-r--r--mm/sparse.c25
-rw-r--r--mm/swap.c16
-rw-r--r--mm/swap_state.c6
-rw-r--r--mm/util.c122
-rw-r--r--mm/vmalloc.c84
-rw-r--r--mm/vmscan.c149
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/z3fold.c154
-rw-r--r--mm/zpool.c16
-rw-r--r--mm/zsmalloc.c23
-rw-r--r--mm/zswap.c15
-rw-r--r--net/xdp/xdp_umem.c9
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--sound/firewire/dice/dice-alesis.c2
-rw-r--r--sound/pci/hda/hda_intel.c3
-rw-r--r--sound/pci/hda/patch_analog.c1
-rw-r--r--sound/pci/hda/patch_realtek.c28
-rw-r--r--sound/soc/atmel/atmel_ssc_dai.c12
-rw-r--r--sound/soc/atmel/atmel_ssc_dai.h1
-rw-r--r--sound/soc/codecs/pcm3168a.c3
-rw-r--r--sound/soc/fsl/fsl_sai.c15
-rw-r--r--sound/soc/fsl/fsl_sai.h1
-rw-r--r--sound/soc/sh/rcar/ssi.c10
-rw-r--r--sound/soc/soc-core.c2
-rw-r--r--sound/soc/ti/Kconfig11
-rw-r--r--sound/usb/quirks.c2
-rw-r--r--tools/hv/Build3
-rw-r--r--tools/hv/Makefile51
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c122
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c25
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c71
-rw-r--r--tools/power/x86/intel-speed-select/isst.h10
-rw-r--r--usr/Makefile3
313 files changed, 5296 insertions, 4180 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 29601d93a1c2..ed35833ad7f0 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -429,10 +429,15 @@ KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
Christoph Lameter <cl@linux-foundation.org>
Description:
- The shrink file is written when memory should be reclaimed from
- a cache. Empty partial slabs are freed and the partial list is
- sorted so the slabs with the fewest available objects are used
- first.
+ The shrink file is used to reclaim unused slab cache
+ memory from a cache. Empty per-cpu or partial slabs
+ are freed and the partial list is sorted so the slabs
+ with the fewest available objects are used first.
+ It only accepts a value of "1" on write for shrinking
+ the cache. Other input values are considered invalid.
+ Shrinking slab caches might be expensive and can
+ adversely impact other running applications. So it
+ should be used with care.
What: /sys/kernel/slab/cache/slab_size
Date: May 2007
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 41bdc038dad9..0ae4f564c2d6 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -85,8 +85,10 @@ Brief summary of control files.
memory.oom_control set/show oom controls.
memory.numa_stat show the number of memory usage per numa
node
-
memory.kmem.limit_in_bytes set/show hard limit for kernel memory
+ This knob is deprecated and shouldn't be
+ used. It is planned that this be removed in
+ the foreseeable future.
memory.kmem.usage_in_bytes show current kernel memory allocation
memory.kmem.failcnt show the number of kernel memory usage
hits limits
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 254d8a369f32..944e03e29f65 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -809,6 +809,8 @@
enables the feature at boot time. By default, it is
disabled and the system will work mostly the same as a
kernel built without CONFIG_DEBUG_PAGEALLOC.
+ Note: to get most of debug_pagealloc error reports, it's
+ useful to also enable the page_owner functionality.
on: enable the feature
debugpat [X86] Enable PAT debugging
diff --git a/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
index e9de3756752b..c9a6587fe4bb 100644
--- a/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
@@ -1,7 +1,9 @@
Broadcom BCM2835 I2C controller
Required properties:
-- compatible : Should be "brcm,bcm2835-i2c".
+- compatible : Should be one of:
+ "brcm,bcm2711-i2c"
+ "brcm,bcm2835-i2c"
- reg: Should contain register location and length.
- interrupts: Should contain interrupt.
- clocks : The clock feeding the I2C controller.
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt
index 3ee5e8f6ee01..3ee5e8f6ee01 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
+++ b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-emev2.txt b/Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt
index 5ed1ea1c7e14..5ed1ea1c7e14 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-emev2.txt
+++ b/Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt
index 202602e6e837..202602e6e837 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
+++ b/Documentation/devicetree/bindings/i2c/renesas,iic.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-riic.txt b/Documentation/devicetree/bindings/i2c/renesas,riic.txt
index e26fe3ad86a9..e26fe3ad86a9 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-riic.txt
+++ b/Documentation/devicetree/bindings/i2c/renesas,riic.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index ec807d446854..a97f1be63b9d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7865,6 +7865,12 @@ S: Maintained
F: drivers/mfd/lpc_ich.c
F: drivers/gpio/gpio-ich.c
+ICY I2C DRIVER
+M: Max Staudt <max@enpas.org>
+L: linux-i2c@vger.kernel.org
+S: Maintained
+F: drivers/i2c/busses/i2c-icy.c
+
IDE SUBSYSTEM
M: "David S. Miller" <davem@davemloft.net>
L: linux-ide@vger.kernel.org
@@ -13791,7 +13797,7 @@ F: drivers/clk/renesas/
RENESAS EMEV2 I2C DRIVER
M: Wolfram Sang <wsa+renesas@sang-engineering.com>
S: Supported
-F: Documentation/devicetree/bindings/i2c/i2c-emev2.txt
+F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt
F: drivers/i2c/busses/i2c-emev2.c
RENESAS ETHERNET DRIVERS
@@ -13813,15 +13819,15 @@ F: drivers/iio/adc/rcar-gyroadc.c
RENESAS R-CAR I2C DRIVERS
M: Wolfram Sang <wsa+renesas@sang-engineering.com>
S: Supported
-F: Documentation/devicetree/bindings/i2c/i2c-rcar.txt
-F: Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
+F: Documentation/devicetree/bindings/i2c/renesas,i2c.txt
+F: Documentation/devicetree/bindings/i2c/renesas,iic.txt
F: drivers/i2c/busses/i2c-rcar.c
F: drivers/i2c/busses/i2c-sh_mobile.c
RENESAS RIIC DRIVER
M: Chris Brandt <chris.brandt@renesas.com>
S: Supported
-F: Documentation/devicetree/bindings/i2c/i2c-riic.txt
+F: Documentation/devicetree/bindings/i2c/renesas,riic.txt
F: drivers/i2c/busses/i2c-riic.c
RENESAS USB PHY DRIVER
diff --git a/arch/Kconfig b/arch/Kconfig
index 0fcf8ec1e098..5f8a5d84dbbe 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -706,6 +706,17 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
and vice-versa 32-bit applications to call 64-bit mmap().
Required for applications doing different bitness syscalls.
+# This allows to use a set of generic functions to determine mmap base
+# address by giving priority to top-down scheme only if the process
+# is not in legacy mode (compat task, unlimited stack size or
+# sysctl_legacy_va_layout).
+# Architecture that selects this option can provide its own version of:
+# - STACK_RND_MASK
+config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+ bool
+ depends on MMU
+ select ARCH_HAS_ELF_RANDOMIZE
+
config HAVE_COPY_THREAD_TLS
bool
help
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 71ded3b7d82d..eb91f1e85629 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -53,6 +53,4 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_page((unsigned long)pmd);
}
-#define check_pgt_cache() do { } while (0)
-
#endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 89c2032f9960..065b57f408c3 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -359,11 +359,6 @@ extern void paging_init(void);
#include <asm-generic/pgtable.h>
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */
#define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 9bdb8ed5b0db..4751f2251cd9 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -129,7 +129,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
-#define check_pgt_cache() do { } while (0)
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
#endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 1d87c18a2976..7addd0301c51 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -395,11 +395,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* to cope with aliasing VIPT cache */
#define HAVE_ARCH_UNMAPPED_AREA
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 229f2cdd81ca..8a50efb559f3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -34,6 +34,7 @@ config ARM
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_IPC_PARSE_VERSION
select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select BUILDTIME_EXTABLE_SORT if MMU
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index a2a68b751971..069da393110c 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -15,8 +15,6 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-#define check_pgt_cache() do { } while (0)
-
#ifdef CONFIG_MMU
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER))
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index d0de24f06724..010fa1a35a68 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -71,11 +71,6 @@ typedef pte_t *pte_addr_t;
extern unsigned int kobjsize(const void *objp);
/*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init() do { } while (0)
-
-/*
* All 32bit addresses are effectively valid for vmalloc...
* Sort of meaningless for non-VM targets.
*/
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f2e990dc27e7..3ae120cd1715 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -368,8 +368,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#define pgtable_cache_init() do { } while (0)
-
#endif /* !__ASSEMBLY__ */
#endif /* CONFIG_MMU */
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 20c2f42454b8..614bf829e454 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -140,8 +140,6 @@ static inline void prefetchw(const void *ptr)
#endif
#endif
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
#endif
#endif /* __ASM_ARM_PROCESSOR_H */
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f934a6739fc0..9485acc520a4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -319,11 +319,6 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- return randomize_page(mm->brk, 0x02000000);
-}
-
#ifdef CONFIG_MMU
#ifdef CONFIG_KUSER_HELPERS
/*
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 6ecbda87ee46..6d89db7895d1 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -204,18 +204,17 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
* coherent with the kernels mapping.
*/
if (!PageHighMem(page)) {
- size_t page_size = PAGE_SIZE << compound_order(page);
- __cpuc_flush_dcache_area(page_address(page), page_size);
+ __cpuc_flush_dcache_area(page_address(page), page_size(page));
} else {
unsigned long i;
if (cache_is_vipt_nonaliasing()) {
- for (i = 0; i < (1 << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
void *addr = kmap_atomic(page + i);
__cpuc_flush_dcache_area(addr, PAGE_SIZE);
kunmap_atomic(addr);
}
} else {
- for (i = 0; i < (1 << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
void *addr = kmap_high_get(page + i);
if (addr) {
__cpuc_flush_dcache_area(addr, PAGE_SIZE);
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index f866870db749..b8d912ac9e61 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -17,33 +17,6 @@
((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \
(((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
-/* gap between mmap and stack */
-#define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP ((TASK_SIZE)/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
- if (current->personality & ADDR_COMPAT_LAYOUT)
- return 1;
-
- if (rlim_stack->rlim_cur == RLIM_INFINITY)
- return 1;
-
- return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
- unsigned long gap = rlim_stack->rlim_cur;
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(TASK_SIZE - gap - rnd);
-}
-
/*
* We need to ensure that shared mappings are correctly aligned to
* avoid aliasing issues with VIPT caches. We need to ensure that
@@ -171,31 +144,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
-unsigned long arch_mmap_rnd(void)
-{
- unsigned long rnd;
-
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-
- return rnd << PAGE_SHIFT;
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
- unsigned long random_factor = 0UL;
-
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
-
- if (mmap_is_legacy(rlim_stack)) {
- mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- }
-}
-
/*
* You really shouldn't be using read() or write() on /dev/mem. This
* might go away in the future.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 37c610963eee..866e05882799 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -15,7 +15,6 @@ config ARM64
select ARCH_HAS_DMA_COHERENT_TO_PFN
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
- select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -71,6 +70,7 @@ config ARM64
select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 14d0bc44d451..172d76fa0245 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -15,8 +15,6 @@
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
-#define check_pgt_cache() do { } while (0)
-
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 57427d17580e..7576df00eb50 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -861,8 +861,6 @@ extern int kern_addr_valid(unsigned long addr);
#include <asm-generic/pgtable.h>
-static inline void pgtable_cache_init(void) { }
-
/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
*/
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index c67848c55009..5623685c7d13 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -280,8 +280,6 @@ static inline void spin_lock_prefetch(const void *ptr)
"nop") : : "p" (ptr));
}
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
extern void __init minsigstksz_setup(void);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 03689c0beb34..a47462def04b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -557,14 +557,6 @@ unsigned long arch_align_stack(unsigned long sp)
return sp & ~0xf;
}
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- if (is_compat_task())
- return randomize_page(mm->brk, SZ_32M);
- else
- return randomize_page(mm->brk, SZ_1G);
-}
-
/*
* Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
*/
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index dc19300309d2..ac485163a4a7 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -56,8 +56,7 @@ void __sync_icache_dcache(pte_t pte)
struct page *page = pte_page(pte);
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
- sync_icache_aliases(page_address(page),
- PAGE_SIZE << compound_order(page));
+ sync_icache_aliases(page_address(page), page_size(page));
}
EXPORT_SYMBOL_GPL(__sync_icache_dcache);
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index b050641b5139..3028bacbc4e9 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -21,78 +21,6 @@
#include <asm/cputype.h>
/*
- * Leave enough space between the mmap area and the stack to honour ulimit in
- * the face of randomisation.
- */
-#define MIN_GAP (SZ_128M)
-#define MAX_GAP (STACK_TOP/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
- if (current->personality & ADDR_COMPAT_LAYOUT)
- return 1;
-
- if (rlim_stack->rlim_cur == RLIM_INFINITY)
- return 1;
-
- return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
- unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
- if (test_thread_flag(TIF_32BIT))
- rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
- else
-#endif
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
- return rnd << PAGE_SHIFT;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
- unsigned long gap = rlim_stack->rlim_cur;
- unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
-
- /* Values close to RLIM_INFINITY can overflow. */
- if (gap + pad > gap)
- gap += pad;
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(STACK_TOP - gap - rnd);
-}
-
-/*
- * This function, called very early during the creation of a new process VM
- * image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
- unsigned long random_factor = 0UL;
-
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
-
- /*
- * Fall back to the standard layout if the personality bit is set, or
- * if the expected stack growth is unlimited:
- */
- if (mmap_is_legacy(rlim_stack)) {
- mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- }
-}
-
-/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
*/
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 7548f9ca1f11..4a64089e5771 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -35,7 +35,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd);
}
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
{
if (PGD_SIZE == PAGE_SIZE)
return;
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0bd805964ea6..0b6919c00413 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -60,11 +60,6 @@ extern unsigned long empty_zero_page;
#define swapper_pg_dir ((pgd_t *) 0)
/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-/*
* c6x is !MMU, so define the simpliest implementation
*/
#define pgprot_writecombine pgprot_noncached
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index 98c5716708d6..d089113fe41f 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -75,8 +75,6 @@ do { \
tlb_remove_page(tlb, pte); \
} while (0)
-#define check_pgt_cache() do {} while (0)
-
extern void pagetable_init(void);
extern void pre_mmu_init(void);
extern void pre_trap_init(void);
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index c429a6f347de..0040b3a05b61 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -296,11 +296,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
#define kern_addr_valid(addr) (1)
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do {} while (0)
-
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
remap_pfn_range(vma, vaddr, pfn, size, prot)
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h
index a99caa49d265..4d00152fab58 100644
--- a/arch/h8300/include/asm/pgtable.h
+++ b/arch/h8300/include/asm/pgtable.h
@@ -4,7 +4,6 @@
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
#include <asm-generic/pgtable.h>
-#define pgtable_cache_init() do { } while (0)
extern void paging_init(void);
#define PAGE_NONE __pgprot(0) /* these mean nothing to NO_MM */
#define PAGE_SHARED __pgprot(0) /* these mean nothing to NO_MM */
@@ -35,11 +34,6 @@ extern unsigned int kobjsize(const void *objp);
extern int is_in_rom(unsigned long);
/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-/*
* All 32bit addresses are effectively valid for vmalloc...
* Sort of meaningless for non-VM targets.
*/
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index d6544dc71258..5a6e79e7926d 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -13,8 +13,6 @@
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
-#define check_pgt_cache() do {} while (0)
-
extern unsigned long long kmap_generation;
/*
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index a3ff6d24c09e..2fec20ad939e 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -431,9 +431,6 @@ static inline int pte_exec(pte_t pte)
#define __pte_offset(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-/* I think this is in case we have page table caches; needed by init/main.c */
-#define pgtable_cache_init() do { } while (0)
-
/*
* Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the PTE is
* interpreted as swap information. The remaining free bits are interpreted as
diff --git a/arch/hexagon/mm/Makefile b/arch/hexagon/mm/Makefile
index 1894263ae5bc..893838499591 100644
--- a/arch/hexagon/mm/Makefile
+++ b/arch/hexagon/mm/Makefile
@@ -3,5 +3,5 @@
# Makefile for Hexagon memory management subsystem
#
-obj-y := init.o pgalloc.o ioremap.o uaccess.o vm_fault.o cache.o
+obj-y := init.o ioremap.o uaccess.o vm_fault.o cache.o
obj-y += copy_to_user.o copy_from_user.o strnlen_user.o vm_tlb.o
diff --git a/arch/hexagon/mm/pgalloc.c b/arch/hexagon/mm/pgalloc.c
deleted file mode 100644
index 4d4316140237..000000000000
--- a/arch/hexagon/mm/pgalloc.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/init.h>
-
-void __init pgtable_cache_init(void)
-{
-}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 685a3df126ca..16714477eef4 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -72,10 +72,6 @@ config 64BIT
config ZONE_DMA32
def_bool y
-config QUICKLIST
- bool
- default y
-
config MMU
bool
default y
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index c9e481023c25..f4c491044882 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -19,18 +19,19 @@
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/threads.h>
-#include <linux/quicklist.h>
+
+#include <asm-generic/pgalloc.h>
#include <asm/mmu_context.h>
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return quicklist_alloc(0, GFP_KERNEL, NULL);
+ return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- quicklist_free(0, NULL, pgd);
+ free_page((unsigned long)pgd);
}
#if CONFIG_PGTABLE_LEVELS == 4
@@ -42,12 +43,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return quicklist_alloc(0, GFP_KERNEL, NULL);
+ return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
- quicklist_free(0, NULL, pud);
+ free_page((unsigned long)pud);
}
#define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud)
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
@@ -60,12 +61,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return quicklist_alloc(0, GFP_KERNEL, NULL);
+ return (pmd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
- quicklist_free(0, NULL, pmd);
+ free_page((unsigned long)pmd);
}
#define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd)
@@ -83,43 +84,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
pmd_val(*pmd_entry) = __pa(pte);
}
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *page;
- void *pg;
-
- pg = quicklist_alloc(0, GFP_KERNEL, NULL);
- if (!pg)
- return NULL;
- page = virt_to_page(pg);
- if (!pgtable_page_ctor(page)) {
- quicklist_free(0, NULL, pg);
- return NULL;
- }
- return page;
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return quicklist_alloc(0, GFP_KERNEL, NULL);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- quicklist_free_page(0, NULL, pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- quicklist_free(0, NULL, pte);
-}
-
-static inline void check_pgt_cache(void)
-{
- quicklist_trim(0, NULL, 25, 16);
-}
-
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
#endif /* _ASM_IA64_PGALLOC_H */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index b1e7468eb65a..d602e7c622db 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -566,11 +566,6 @@ extern struct page *zero_page_memmap_ptr;
#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M
#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
/* These tell get_user_pages() that the first gate page is accessible from user-level. */
#define FIXADDR_USER_START GATE_ADDR
#ifdef HAVE_BUGGY_SEGREL
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index f10208478131..8e91c86e8072 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -583,6 +583,7 @@ void ia64_process_pending_intr(void)
static irqreturn_t dummy_handler (int irq, void *dev_id)
{
BUG();
+ return IRQ_NONE;
}
static struct irqaction ipi_irqaction = {
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index db09a693f094..5b00dc3898e1 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -108,7 +108,6 @@ setup_per_cpu_areas(void)
struct pcpu_group_info *gi;
unsigned int cpu;
ssize_t static_size, reserved_size, dyn_size;
- int rc;
ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
if (!ai)
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 219fc640414b..4f33f6e7e206 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -186,7 +186,7 @@ void __init setup_per_cpu_areas(void)
unsigned long base_offset;
unsigned int cpu;
ssize_t static_size, reserved_size, dyn_size;
- int node, prev_node, unit, nr_units, rc;
+ int node, prev_node, unit, nr_units;
ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
if (!ai)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 678b98a09c85..bf9df2625bc8 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -64,7 +64,7 @@ __ia64_sync_icache_dcache (pte_t pte)
if (test_bit(PG_arch_1, &page->flags))
return; /* i-cache is already coherent with d-cache */
- flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
+ flush_icache_range(addr, addr + page_size(page));
set_bit(PG_arch_1, &page->flags); /* mark page as clean */
}
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index fde4534b974f..646c174fff99 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -176,11 +176,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot);
#include <asm-generic/pgtable.h>
#endif /* !__ASSEMBLY__ */
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-#define check_pgt_cache() do { } while (0)
-
#endif /* _M68K_PGTABLE_H */
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index fc3a96c77bd8..c18165b0d904 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -45,11 +45,6 @@ extern void paging_init(void);
#define ZERO_PAGE(vaddr) (virt_to_page(0))
/*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init() do { } while (0)
-
-/*
* All 32bit addresses are effectively valid for vmalloc...
* Sort of meaningless for non-VM targets.
*/
@@ -60,6 +55,4 @@ extern void paging_init(void);
#include <asm-generic/pgtable.h>
-#define check_pgt_cache() do { } while (0)
-
#endif /* _M68KNOMMU_PGTABLE_H */
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 632c9477a0f6..c9c4be822456 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -5,15 +5,18 @@ config MICROBLAZE
select ARCH_NO_SWAP
select ARCH_HAS_BINFMT_FLAT if !MMU
select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_UNCACHED_SEGMENT if !MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select TIMER_OF
select CLONE_BACKWARDS3
select COMMON_CLK
+ select DMA_DIRECT_REMAP if MMU
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts
index 5a8a9d090c37..5b236527176e 100644
--- a/arch/microblaze/boot/dts/system.dts
+++ b/arch/microblaze/boot/dts/system.dts
@@ -18,7 +18,6 @@
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,microblaze";
- hard-reset-gpios = <&LEDs_8Bit 2 1>;
model = "testing";
DDR2_SDRAM: memory@90000000 {
device_type = "memory";
@@ -281,6 +280,21 @@
gpios = <&LEDs_8Bit 7 1>;
};
} ;
+
+ gpio-restart {
+ compatible = "gpio-restart";
+ /*
+ * FIXME: is this active low or active high?
+ * the current flag (1) indicates active low.
+ * delay measures are templates, should be adjusted
+ * to datasheet or trial-and-error with real hardware.
+ */
+ gpios = <&LEDs_8Bit 2 1>;
+ active-delay = <100>;
+ inactive-delay = <10>;
+ wait-delay = <100>;
+ };
+
RS232_Uart_1: serial@84000000 {
clock-frequency = <125000000>;
compatible = "xlnx,xps-uartlite-1.00.a";
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 92fd4e95b488..654edfdc7867 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -5,15 +5,10 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_SYSFS_DEPRECATED=y
CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_KALLSYMS_ALL=y
# CONFIG_BASE_FULL is not set
+CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_EFI_PARTITION is not set
CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -25,14 +20,19 @@ CONFIG_MMU=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE_FORCE=y
CONFIG_HIGHMEM=y
-CONFIG_PCI=y
CONFIG_PCI_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
CONFIG_BRIDGE=m
+CONFIG_PCI=y
CONFIG_MTD=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
@@ -41,6 +41,7 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
CONFIG_NETDEVICES=y
CONFIG_XILINX_EMACLITE=y
+CONFIG_XILINX_AXI_EMAC=y
CONFIG_XILINX_LL_TEMAC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -59,6 +60,8 @@ CONFIG_SPI_XILINX=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_XILINX=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_XILINX_WATCHDOG=y
@@ -74,8 +77,8 @@ CONFIG_CRAMFS=y
CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
CONFIG_CIFS=y
-CONFIG_CIFS_STATS=y
CONFIG_CIFS_STATS2=y
+CONFIG_ENCRYPTED_KEYS=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_SLAB=y
CONFIG_DETECT_HUNG_TASK=y
@@ -83,6 +86,3 @@ CONFIG_DEBUG_SPINLOCK=y
CONFIG_KGDB=y
CONFIG_KGDB_TESTS=y
CONFIG_KGDB_KDB=y
-CONFIG_EARLY_PRINTK=y
-CONFIG_KEYS=y
-CONFIG_ENCRYPTED_KEYS=y
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index 06d69a6e192d..377de39ccb8c 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -7,15 +7,10 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_SYSFS_DEPRECATED=y
CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_KALLSYMS_ALL=y
# CONFIG_BASE_FULL is not set
+CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_EFI_PARTITION is not set
CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -25,13 +20,18 @@ CONFIG_XILINX_MICROBLAZE0_USE_FPU=2
CONFIG_HZ_100=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE_FORCE=y
-CONFIG_PCI=y
CONFIG_PCI_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
+CONFIG_PCI=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
@@ -62,6 +62,8 @@ CONFIG_SPI_XILINX=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_XILINX=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_XILINX_WATCHDOG=y
@@ -75,11 +77,6 @@ CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NLS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_SLAB=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_EARLY_PRINTK=y
CONFIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=y
CONFIG_CRYPTO_ECB=y
@@ -87,3 +84,7 @@ CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_DES=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index c7968139486f..86c95b2a1ce1 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -40,7 +40,6 @@ extern void iounmap(volatile void __iomem *addr);
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
#define ioremap_nocache(addr, size) ioremap((addr), (size))
-#define ioremap_fullcache(addr, size) ioremap((addr), (size))
#define ioremap_wc(addr, size) ioremap((addr), (size))
#define ioremap_wt(addr, size) ioremap((addr), (size))
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index f4cc9ffc449e..7ecb05baa601 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -21,83 +21,23 @@
#include <asm/cache.h>
#include <asm/pgtable.h>
-#define PGDIR_ORDER 0
-
-/*
- * This is handled very differently on MicroBlaze since out page tables
- * are all 0's and I want to be able to use these zero'd pages elsewhere
- * as well - it gives us quite a speedup.
- * -- Cort
- */
-extern struct pgtable_cache_struct {
- unsigned long *pgd_cache;
- unsigned long *pte_cache;
- unsigned long pgtable_cache_sz;
-} quicklists;
-
-#define pgd_quicklist (quicklists.pgd_cache)
-#define pmd_quicklist ((unsigned long *)0)
-#define pte_quicklist (quicklists.pte_cache)
-#define pgtable_cache_size (quicklists.pgtable_cache_sz)
-
-extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */
-extern atomic_t zero_sz; /* # currently pre-zero'd pages */
-extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */
-extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */
-extern atomic_t zerototal; /* # pages zero'd over time */
-
-#define zero_quicklist (zero_cache)
-#define zero_cache_sz (zero_sz)
-#define zero_cache_calls (zeropage_calls)
-#define zero_cache_hits (zeropage_hits)
-#define zero_cache_total (zerototal)
-
-/*
- * return a pre-zero'd page from the list,
- * return NULL if none available -- Cort
- */
-extern unsigned long get_zero_page_fast(void);
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include <asm-generic/pgalloc.h>
extern void __bad_pte(pmd_t *pmd);
-static inline pgd_t *get_pgd_slow(void)
+static inline pgd_t *get_pgd(void)
{
- pgd_t *ret;
-
- ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER);
- if (ret != NULL)
- clear_page(ret);
- return ret;
+ return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
}
-static inline pgd_t *get_pgd_fast(void)
-{
- unsigned long *ret;
-
- ret = pgd_quicklist;
- if (ret != NULL) {
- pgd_quicklist = (unsigned long *)(*ret);
- ret[0] = 0;
- pgtable_cache_size--;
- } else
- ret = (unsigned long *)get_pgd_slow();
- return (pgd_t *)ret;
-}
-
-static inline void free_pgd_fast(pgd_t *pgd)
-{
- *(unsigned long **)pgd = pgd_quicklist;
- pgd_quicklist = (unsigned long *) pgd;
- pgtable_cache_size++;
-}
-
-static inline void free_pgd_slow(pgd_t *pgd)
+static inline void free_pgd(pgd_t *pgd)
{
free_page((unsigned long)pgd);
}
-#define pgd_free(mm, pgd) free_pgd_fast(pgd)
-#define pgd_alloc(mm) get_pgd_fast()
+#define pgd_free(mm, pgd) free_pgd(pgd)
+#define pgd_alloc(mm) get_pgd()
#define pmd_pgtable(pmd) pmd_page(pmd)
@@ -110,50 +50,6 @@ static inline void free_pgd_slow(pgd_t *pgd)
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
- struct page *ptepage;
-
-#ifdef CONFIG_HIGHPTE
- int flags = GFP_KERNEL | __GFP_HIGHMEM;
-#else
- int flags = GFP_KERNEL;
-#endif
-
- ptepage = alloc_pages(flags, 0);
- if (!ptepage)
- return NULL;
- clear_highpage(ptepage);
- if (!pgtable_page_ctor(ptepage)) {
- __free_page(ptepage);
- return NULL;
- }
- return ptepage;
-}
-
-static inline void pte_free_fast(pte_t *pte)
-{
- *(unsigned long **)pte = pte_quicklist;
- pte_quicklist = (unsigned long *) pte;
- pgtable_cache_size++;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free_slow(struct page *ptepage)
-{
- __free_page(ptepage);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte))
#define pmd_populate(mm, pmd, pte) \
@@ -171,10 +67,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
#define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x)
#define pgd_populate(mm, pmd, pte) BUG()
-extern int do_check_pgt_cache(int, int);
-
#endif /* CONFIG_MMU */
-#define check_pgt_cache() do { } while (0)
-
#endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 142d3f004848..954b69af451f 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -46,8 +46,6 @@ extern int mem_init_done;
#define swapper_pg_dir ((pgd_t *) NULL)
-#define pgtable_cache_init() do {} while (0)
-
#define arch_enter_lazy_cpu_mode() do {} while (0)
#define pgprot_noncached_wc(prot) prot
@@ -526,11 +524,6 @@ extern unsigned long iopa(unsigned long addr);
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
#define kern_addr_valid(addr) (1)
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
void do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code);
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index bff2a71c828a..a1f206b90753 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -163,44 +163,15 @@ extern long __user_bad(void);
* Returns zero on success, or -EFAULT on error.
* On error, the variable @x is set to zero.
*/
-#define get_user(x, ptr) \
- __get_user_check((x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_check(x, ptr, size) \
-({ \
- unsigned long __gu_val = 0; \
- const typeof(*(ptr)) __user *__gu_addr = (ptr); \
- int __gu_err = 0; \
- \
- if (access_ok(__gu_addr, size)) { \
- switch (size) { \
- case 1: \
- __get_user_asm("lbu", __gu_addr, __gu_val, \
- __gu_err); \
- break; \
- case 2: \
- __get_user_asm("lhu", __gu_addr, __gu_val, \
- __gu_err); \
- break; \
- case 4: \
- __get_user_asm("lw", __gu_addr, __gu_val, \
- __gu_err); \
- break; \
- default: \
- __gu_err = __user_bad(); \
- break; \
- } \
- } else { \
- __gu_err = -EFAULT; \
- } \
- x = (__force typeof(*(ptr)))__gu_val; \
- __gu_err; \
+#define get_user(x, ptr) ({ \
+ const typeof(*(ptr)) __user *__gu_ptr = (ptr); \
+ access_ok(__gu_ptr, sizeof(*__gu_ptr)) ? \
+ __get_user(x, __gu_ptr) : -EFAULT; \
})
#define __get_user(x, ptr) \
({ \
unsigned long __gu_val = 0; \
- /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \
long __gu_err; \
switch (sizeof(*(ptr))) { \
case 1: \
@@ -212,6 +183,11 @@ extern long __user_bad(void);
case 4: \
__get_user_asm("lw", (ptr), __gu_val, __gu_err); \
break; \
+ case 8: \
+ __gu_err = __copy_from_user(&__gu_val, ptr, 8); \
+ if (__gu_err) \
+ __gu_err = -EFAULT; \
+ break; \
default: \
/* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\
} \
diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c
index fcbe1daf6316..5f4722908164 100644
--- a/arch/microblaze/kernel/reset.c
+++ b/arch/microblaze/kernel/reset.c
@@ -8,83 +8,9 @@
*/
#include <linux/init.h>
+#include <linux/delay.h>
#include <linux/of_platform.h>
-
-/* Trigger specific functions */
-#ifdef CONFIG_GPIOLIB
-
-#include <linux/of_gpio.h>
-
-static int handle; /* reset pin handle */
-static unsigned int reset_val;
-
-static int of_platform_reset_gpio_probe(void)
-{
- int ret;
- handle = of_get_named_gpio(of_find_node_by_path("/"),
- "hard-reset-gpios", 0);
-
- if (!gpio_is_valid(handle)) {
- pr_info("Skipping unavailable RESET gpio %d (%s)\n",
- handle, "reset");
- return -ENODEV;
- }
-
- ret = gpio_request(handle, "reset");
- if (ret < 0) {
- pr_info("GPIO pin is already allocated\n");
- return ret;
- }
-
- /* get current setup value */
- reset_val = gpio_get_value(handle);
- /* FIXME maybe worth to perform any action */
- pr_debug("Reset: Gpio output state: 0x%x\n", reset_val);
-
- /* Setup GPIO as output */
- ret = gpio_direction_output(handle, 0);
- if (ret < 0)
- goto err;
-
- /* Setup output direction */
- gpio_set_value(handle, 0);
-
- pr_info("RESET: Registered gpio device: %d, current val: %d\n",
- handle, reset_val);
- return 0;
-err:
- gpio_free(handle);
- return ret;
-}
-device_initcall(of_platform_reset_gpio_probe);
-
-
-static void gpio_system_reset(void)
-{
- if (gpio_is_valid(handle))
- gpio_set_value(handle, 1 - reset_val);
- else
- pr_notice("Reset GPIO unavailable - halting!\n");
-}
-#else
-static void gpio_system_reset(void)
-{
- pr_notice("No reset GPIO present - halting!\n");
-}
-
-void of_platform_reset_gpio_probe(void)
-{
- return;
-}
-#endif
-
-void machine_restart(char *cmd)
-{
- pr_notice("Machine restart...\n");
- gpio_system_reset();
- while (1)
- ;
-}
+#include <linux/reboot.h>
void machine_shutdown(void)
{
@@ -106,3 +32,12 @@ void machine_power_off(void)
while (1)
;
}
+
+void machine_restart(char *cmd)
+{
+ do_kernel_restart(cmd);
+ /* Give the restart hook 1 s to take us down */
+ mdelay(1000);
+ pr_emerg("Reboot failed -- System halted\n");
+ while (1);
+}
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index bc7042209c57..8c5f0c332d8b 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -4,217 +4,56 @@
* Copyright (C) 2010 Michal Simek <monstr@monstr.eu>
* Copyright (C) 2010 PetaLogix
* Copyright (C) 2005 John Williams <jwilliams@itee.uq.edu.au>
- *
- * Based on PowerPC version derived from arch/arm/mm/consistent.c
- * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
- * Copyright (C) 2000 Russell King
*/
-#include <linux/export.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
#include <linux/kernel.h>
-#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/memblock.h>
-#include <linux/highmem.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/gfp.h>
#include <linux/dma-noncoherent.h>
-
-#include <asm/pgalloc.h>
-#include <linux/io.h>
-#include <linux/hardirq.h>
-#include <linux/mmu_context.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include <asm/cpuinfo.h>
-#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
-#ifndef CONFIG_MMU
-/* I have to use dcache values because I can't relate on ram size */
-# define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
-#endif
-
-/*
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core.
- * My crufty no-MMU approach is simple. In the HW platform we can optionally
- * mirror the DDR up above the processor cacheable region. So, memory accessed
- * in this mirror region will not be cached. It's alloced from the same
- * pool as normal memory, but the handle we return is shifted up into the
- * uncached region. This will no doubt cause big problems if memory allocated
- * here is not also freed properly. -- JW
- */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- unsigned long order, vaddr;
- void *ret;
- unsigned int i, err = 0;
- struct page *page, *end;
-
-#ifdef CONFIG_MMU
- phys_addr_t pa;
- struct vm_struct *area;
- unsigned long va;
-#endif
-
- if (in_interrupt())
- BUG();
-
- /* Only allocate page size areas. */
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- vaddr = __get_free_pages(gfp | __GFP_ZERO, order);
- if (!vaddr)
- return NULL;
+ phys_addr_t paddr = page_to_phys(page);
- /*
- * we need to ensure that there are no cachelines in use,
- * or worse dirty in this area.
- */
- flush_dcache_range(virt_to_phys((void *)vaddr),
- virt_to_phys((void *)vaddr) + size);
+ flush_dcache_range(paddr, paddr + size);
+}
#ifndef CONFIG_MMU
- ret = (void *)vaddr;
- /*
- * Here's the magic! Note if the uncached shadow is not implemented,
- * it's up to the calling code to also test that condition and make
- * other arranegments, such as manually flushing the cache and so on.
- */
-# ifdef CONFIG_XILINX_UNCACHED_SHADOW
- ret = (void *)((unsigned) ret | UNCACHED_SHADOW_MASK);
-# endif
- if ((unsigned int)ret > cpuinfo.dcache_base &&
- (unsigned int)ret < cpuinfo.dcache_high)
- pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
-
- /* dma_handle is same as physical (shadowed) address */
- *dma_handle = (dma_addr_t)ret;
+/*
+ * Consistent memory allocators. Used for DMA devices that want to share
+ * uncached memory with the processor core. My crufty no-MMU approach is
+ * simple. In the HW platform we can optionally mirror the DDR up above the
+ * processor cacheable region. So, memory accessed in this mirror region will
+ * not be cached. It's alloced from the same pool as normal memory, but the
+ * handle we return is shifted up into the uncached region. This will no doubt
+ * cause big problems if memory allocated here is not also freed properly. -- JW
+ *
+ * I have to use dcache values because I can't relate on ram size:
+ */
+#ifdef CONFIG_XILINX_UNCACHED_SHADOW
+#define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1)
#else
- /* Allocate some common virtual space to map the new pages. */
- area = get_vm_area(size, VM_ALLOC);
- if (!area) {
- free_pages(vaddr, order);
- return NULL;
- }
- va = (unsigned long) area->addr;
- ret = (void *)va;
-
- /* This gives us the real physical address of the first page. */
- *dma_handle = pa = __virt_to_phys(vaddr);
-#endif
-
- /*
- * free wasted pages. We skip the first page since we know
- * that it will have count = 1 and won't require freeing.
- * We also mark the pages in use as reserved so that
- * remap_page_range works.
- */
- page = virt_to_page(vaddr);
- end = page + (1 << order);
-
- split_page(page, order);
-
- for (i = 0; i < size && err == 0; i += PAGE_SIZE) {
-#ifdef CONFIG_MMU
- /* MS: This is the whole magic - use cache inhibit pages */
- err = map_page(va + i, pa + i, _PAGE_KERNEL | _PAGE_NO_CACHE);
-#endif
+#define UNCACHED_SHADOW_MASK 0
+#endif /* CONFIG_XILINX_UNCACHED_SHADOW */
- SetPageReserved(page);
- page++;
- }
-
- /* Free the otherwise unused pages. */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- if (err) {
- free_pages(vaddr, order);
- return NULL;
- }
-
- return ret;
-}
-
-#ifdef CONFIG_MMU
-static pte_t *consistent_virt_to_pte(void *vaddr)
+void *uncached_kernel_address(void *ptr)
{
- unsigned long addr = (unsigned long)vaddr;
-
- return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
- dma_addr_t dma_addr)
-{
- pte_t *ptep = consistent_virt_to_pte(vaddr);
-
- if (pte_none(*ptep) || !pte_present(*ptep))
- return 0;
+ unsigned long addr = (unsigned long)ptr;
- return pte_pfn(*ptep);
+ addr |= UNCACHED_SHADOW_MASK;
+ if (addr > cpuinfo.dcache_base && addr < cpuinfo.dcache_high)
+ pr_warn("ERROR: Your cache coherent area is CACHED!!!\n");
+ return (void *)addr;
}
-#endif
-/*
- * free page(s) as defined by the above mapping.
- */
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs)
+void *cached_kernel_address(void *ptr)
{
- struct page *page;
-
- if (in_interrupt())
- BUG();
-
- size = PAGE_ALIGN(size);
-
-#ifndef CONFIG_MMU
- /* Clear SHADOW_MASK bit in address, and free as per usual */
-# ifdef CONFIG_XILINX_UNCACHED_SHADOW
- vaddr = (void *)((unsigned)vaddr & ~UNCACHED_SHADOW_MASK);
-# endif
- page = virt_to_page(vaddr);
-
- do {
- __free_reserved_page(page);
- page++;
- } while (size -= PAGE_SIZE);
-#else
- do {
- pte_t *ptep = consistent_virt_to_pte(vaddr);
- unsigned long pfn;
-
- if (!pte_none(*ptep) && pte_present(*ptep)) {
- pfn = pte_pfn(*ptep);
- pte_clear(&init_mm, (unsigned int)vaddr, ptep);
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- __free_reserved_page(page);
- }
- }
- vaddr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
+ unsigned long addr = (unsigned long)ptr;
- /* flush tlb */
- flush_tlb_all();
-#endif
+ return (void *)(addr & ~UNCACHED_SHADOW_MASK);
}
+#endif /* CONFIG_MMU */
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 8fe54fda31dc..010bb9cee2e4 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -44,10 +44,6 @@ unsigned long ioremap_base;
unsigned long ioremap_bot;
EXPORT_SYMBOL(ioremap_bot);
-#ifndef CONFIG_SMP
-struct pgtable_cache_struct quicklists;
-#endif
-
static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cc8e2b1032a5..a0bd9bdb5f83 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -5,7 +5,6 @@ config MIPS
select ARCH_32BIT_OFF_T if !64BIT
select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
select ARCH_CLOCKSOURCE_DATA
- select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_SUPPORTS_UPROBES
@@ -13,6 +12,7 @@ config MIPS
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index aa16b85ddffc..aa73cb187a07 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -105,8 +105,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
#endif /* __PAGETABLE_PUD_FOLDED */
-#define check_pgt_cache() do { } while (0)
-
extern void pagetable_init(void);
#endif /* _ASM_PGALLOC_H */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4dca733d5076..f85bd5b15f51 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -661,9 +661,4 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
#endif /* _ASM_PGTABLE_H */
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index aca909bd7841..fba18d4a9190 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -29,11 +29,6 @@
extern unsigned int vced_count, vcei_count;
-/*
- * MIPS does have an arch_pick_mmap_layout()
- */
-#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
-
#ifdef CONFIG_32BIT
#ifdef CONFIG_KVM_GUEST
/* User space process size is limited to 1GB in KVM Guest Mode */
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index d79f2b432318..00fe90c6db3e 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -20,33 +20,6 @@
unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */
EXPORT_SYMBOL(shm_align_mask);
-/* gap between mmap and stack */
-#define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP ((TASK_SIZE)/6*5)
-
-static int mmap_is_legacy(struct rlimit *rlim_stack)
-{
- if (current->personality & ADDR_COMPAT_LAYOUT)
- return 1;
-
- if (rlim_stack->rlim_cur == RLIM_INFINITY)
- return 1;
-
- return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
-{
- unsigned long gap = rlim_stack->rlim_cur;
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(TASK_SIZE - gap - rnd);
-}
-
#define COLOUR_ALIGN(addr, pgoff) \
((((addr) + shm_align_mask) & ~shm_align_mask) + \
(((pgoff) << PAGE_SHIFT) & shm_align_mask))
@@ -144,63 +117,6 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
addr0, len, pgoff, flags, DOWN);
}
-unsigned long arch_mmap_rnd(void)
-{
- unsigned long rnd;
-
-#ifdef CONFIG_COMPAT
- if (TASK_IS_32BIT_ADDR)
- rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
- else
-#endif /* CONFIG_COMPAT */
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-
- return rnd << PAGE_SHIFT;
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
-{
- unsigned long random_factor = 0UL;
-
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
-
- if (mmap_is_legacy(rlim_stack)) {
- mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- }
-}
-
-static inline unsigned long brk_rnd(void)
-{
- unsigned long rnd = get_random_long();
-
- rnd = rnd << PAGE_SHIFT;
- /* 8MB for 32bit, 256MB for 64bit */
- if (TASK_IS_32BIT_ADDR)
- rnd = rnd & 0x7ffffful;
- else
- rnd = rnd & 0xffffffful;
-
- return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- unsigned long base = mm->brk;
- unsigned long ret;
-
- ret = PAGE_ALIGN(base + brk_rnd());
-
- if (ret < mm->brk)
- return mm->brk;
-
- return ret;
-}
-
bool __virt_addr_valid(const volatile void *kaddr)
{
unsigned long vaddr = (unsigned long)kaddr;
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index e78b43d8389f..37125e6884d7 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -23,8 +23,6 @@
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
-#define check_pgt_cache() do { } while (0)
-
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
pgtable_t pte;
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index c70cc56bec09..0588ec99725c 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -403,8 +403,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
* into virtual address `from'
*/
-#define pgtable_cache_init() do { } while (0)
-
#endif /* !__ASSEMBLY__ */
#endif /* _ASMNDS32_PGTABLE_H */
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 4bc8cf72067e..750d18d5980b 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -45,6 +45,4 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
tlb_remove_page((tlb), (pte)); \
} while (0)
-#define check_pgt_cache() do { } while (0)
-
#endif /* _ASM_NIOS2_PGALLOC_H */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 95237b7f6fc1..99985d8b7166 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -291,8 +291,6 @@ static inline void pte_clear(struct mm_struct *mm,
#include <asm-generic/pgtable.h>
-#define pgtable_cache_init() do { } while (0)
-
extern void __init paging_init(void);
extern void __init mmu_init(void);
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 3d4b397c2d06..787c1b9d2f6d 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -101,6 +101,4 @@ do { \
#define pmd_pgtable(pmd) pmd_page(pmd)
-#define check_pgt_cache() do { } while (0)
-
#endif
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 2fe9ff5b5d6f..248d22d8faa7 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -443,11 +443,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
#include <asm-generic/pgtable.h>
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
typedef pte_t *pte_addr_t;
#endif /* __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 4f2059a50fae..d98647c29b74 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -124,6 +124,4 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
pmd_populate_kernel(mm, pmd, page_address(pte_page))
#define pmd_pgtable(pmd) pmd_page(pmd)
-#define check_pgt_cache() do { } while (0)
-
#endif
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 6d58c1739b42..4ac374b3a99f 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -132,8 +132,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define PTRS_PER_PTE (1UL << BITS_PER_PTE)
/* Definitions for 2nd level */
-#define pgtable_cache_init() do { } while (0)
-
#define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE)
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 2b2c60a1a66d..6dd78a2dc03a 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) pgtable_cache[shift]
-static inline void check_pgt_cache(void) { }
-
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 8b7865a2d576..4053b2ab427c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -87,7 +87,6 @@ extern unsigned long ioremap_bot;
unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned int shift);
-void pgtable_cache_init(void);
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
void mark_initmem_nx(void);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 3410ea9f4de1..6c123760164e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1748,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
/*
* IF we try to do a HUGE PTE update after a withdraw is done.
* we will find the below NULL. This happens when we do
- * split_huge_page_pmd
+ * split_huge_pmd
*/
if (!hpte_slot_array)
return;
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index b056cae3388b..56cc84520577 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -129,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
* Allow to use larger than 64k IOMMU pages. Only do that
* if we are backed by hugetlb.
*/
- if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
- struct page *head = compound_head(page);
-
- pageshift = compound_order(head) + PAGE_SHIFT;
- }
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+ pageshift = page_shift(compound_head(page));
mem->pageshift = min(mem->pageshift, pageshift);
/*
* We don't need struct page reference any more, switch
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a8953f108808..73d4873fc7f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -667,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
BUG_ON(!PageCompound(page));
- for (i = 0; i < (1UL << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
if (!PageHighMem(page)) {
__flush_dcache_icache(page_address(page+i));
} else {
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 065ff14b76e1..1d93e55a2de1 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -10,6 +10,8 @@
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/fsnotify.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
@@ -20,7 +22,6 @@
#include <linux/pagemap.h>
#include <linux/poll.h>
#include <linux/slab.h>
-#include <linux/parser.h>
#include <asm/prom.h>
#include <asm/spu.h>
@@ -30,7 +31,7 @@
#include "spufs.h"
struct spufs_sb_info {
- int debug;
+ bool debug;
};
static struct kmem_cache *spufs_inode_cache;
@@ -574,16 +575,27 @@ long spufs_create(struct path *path, struct dentry *dentry,
}
/* File system initialization */
+struct spufs_fs_context {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
enum {
- Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+ Opt_uid, Opt_gid, Opt_mode, Opt_debug,
+};
+
+static const struct fs_parameter_spec spufs_param_specs[] = {
+ fsparam_u32 ("gid", Opt_gid),
+ fsparam_u32oct ("mode", Opt_mode),
+ fsparam_u32 ("uid", Opt_uid),
+ fsparam_flag ("debug", Opt_debug),
+ {}
};
-static const match_table_t spufs_tokens = {
- { Opt_uid, "uid=%d" },
- { Opt_gid, "gid=%d" },
- { Opt_mode, "mode=%o" },
- { Opt_debug, "debug" },
- { Opt_err, NULL },
+static const struct fs_parameter_description spufs_fs_parameters = {
+ .name = "spufs",
+ .specs = spufs_param_specs,
};
static int spufs_show_options(struct seq_file *m, struct dentry *root)
@@ -604,47 +616,41 @@ static int spufs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static int
-spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token, option;
-
- if (!*p)
- continue;
-
- token = match_token(p, spufs_tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 0;
- root->i_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(root->i_uid))
- return 0;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 0;
- root->i_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(root->i_gid))
- return 0;
- break;
- case Opt_mode:
- if (match_octal(&args[0], &option))
- return 0;
- root->i_mode = option | S_IFDIR;
- break;
- case Opt_debug:
- spufs_get_sb_info(sb)->debug = 1;
- break;
- default:
- return 0;
- }
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct spufs_fs_context *ctx = fc->fs_private;
+ struct spufs_sb_info *sbi = fc->s_fs_info;
+ struct fs_parse_result result;
+ kuid_t uid;
+ kgid_t gid;
+ int opt;
+
+ opt = fs_parse(fc, &spufs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ return invalf(fc, "Unknown uid");
+ ctx->uid = uid;
+ break;
+ case Opt_gid:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ return invalf(fc, "Unknown gid");
+ ctx->gid = gid;
+ break;
+ case Opt_mode:
+ ctx->mode = result.uint_32 & S_IALLUGO;
+ break;
+ case Opt_debug:
+ sbi->debug = true;
+ break;
}
- return 1;
+
+ return 0;
}
static void spufs_exit_isolated_loader(void)
@@ -678,79 +684,98 @@ spufs_init_isolated_loader(void)
printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
}
-static int
-spufs_create_root(struct super_block *sb, void *data)
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
{
+ struct spufs_fs_context *ctx = fc->fs_private;
struct inode *inode;
- int ret;
- ret = -ENODEV;
if (!spu_management_ops)
- goto out;
+ return -ENODEV;
- ret = -ENOMEM;
- inode = spufs_new_inode(sb, S_IFDIR | 0775);
+ inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
if (!inode)
- goto out;
+ return -ENOMEM;
+ inode->i_uid = ctx->uid;
+ inode->i_gid = ctx->gid;
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
SPUFS_I(inode)->i_ctx = NULL;
inc_nlink(inode);
- ret = -EINVAL;
- if (!spufs_parse_options(sb, data, inode))
- goto out_iput;
-
- ret = -ENOMEM;
sb->s_root = d_make_root(inode);
if (!sb->s_root)
- goto out;
-
+ return -ENOMEM;
return 0;
-out_iput:
- iput(inode);
-out:
- return ret;
}
-static int
-spufs_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct spufs_sb_info *info;
- static const struct super_operations s_ops = {
- .alloc_inode = spufs_alloc_inode,
- .free_inode = spufs_free_inode,
- .statfs = simple_statfs,
- .evict_inode = spufs_evict_inode,
- .show_options = spufs_show_options,
- };
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
+static const struct super_operations spufs_ops = {
+ .alloc_inode = spufs_alloc_inode,
+ .free_inode = spufs_free_inode,
+ .statfs = simple_statfs,
+ .evict_inode = spufs_evict_inode,
+ .show_options = spufs_show_options,
+};
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = SPUFS_MAGIC;
- sb->s_op = &s_ops;
- sb->s_fs_info = info;
+ sb->s_op = &spufs_ops;
- return spufs_create_root(sb, data);
+ return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, spufs_fill_super);
}
-static struct dentry *
-spufs_mount(struct file_system_type *fstype, int flags,
- const char *name, void *data)
+static void spufs_free_fc(struct fs_context *fc)
{
- return mount_single(fstype, flags, data, spufs_fill_super);
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+ .free = spufs_free_fc,
+ .parse_param = spufs_parse_param,
+ .get_tree = spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+ struct spufs_fs_context *ctx;
+ struct spufs_sb_info *sbi;
+
+ ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ goto nomem;
+
+ sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ goto nomem_ctx;
+
+ ctx->uid = current_uid();
+ ctx->gid = current_gid();
+ ctx->mode = 0755;
+
+ fc->s_fs_info = sbi;
+ fc->ops = &spufs_context_ops;
+ return 0;
+
+nomem_ctx:
+ kfree(ctx);
+nomem:
+ return -ENOMEM;
}
static struct file_system_type spufs_type = {
.owner = THIS_MODULE,
.name = "spufs",
- .mount = spufs_mount,
+ .init_fs_context = spufs_init_fs_context,
+ .parameters = &spufs_fs_parameters,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("spufs");
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 71d29fb4008a..8eebbc8860bb 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -59,6 +59,18 @@ config RISCV
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select SPARSEMEM_STATIC if 32BIT
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
+ select HAVE_ARCH_MMAP_RND_BITS
+
+config ARCH_MMAP_RND_BITS_MIN
+ default 18 if 64BIT
+ default 8
+
+# max bits determined by the following formula:
+# VA_BITS - PAGE_SHIFT - 3
+config ARCH_MMAP_RND_BITS_MAX
+ default 24 if 64BIT # SV39 based
+ default 17
config MMU
def_bool y
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 56a67d66f72f..f66a00d8cb19 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,8 +82,4 @@ do { \
tlb_remove_page((tlb), pte); \
} while (0)
-static inline void check_pgt_cache(void)
-{
-}
-
#endif /* _ASM_RISCV_PGALLOC_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 80905b27ee98..c60123f018f5 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -424,11 +424,6 @@ extern void *dtb_early_va;
extern void setup_bootmem(void);
extern void paging_init(void);
-static inline void pgtable_cache_init(void)
-{
- /* No page table caches to initialize */
-}
-
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index a4418fc425b8..70139d0791b6 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -12,17 +12,17 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/namei.h>
#include <linux/vfs.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/time.h>
-#include <linux/parser.h>
#include <linux/sysfs.h>
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/seq_file.h>
-#include <linux/mount.h>
#include <linux/uio.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
@@ -207,52 +207,44 @@ static int hypfs_release(struct inode *inode, struct file *filp)
return 0;
}
-enum { opt_uid, opt_gid, opt_err };
+enum { Opt_uid, Opt_gid, };
-static const match_table_t hypfs_tokens = {
- {opt_uid, "uid=%u"},
- {opt_gid, "gid=%u"},
- {opt_err, NULL}
+static const struct fs_parameter_spec hypfs_param_specs[] = {
+ fsparam_u32("gid", Opt_gid),
+ fsparam_u32("uid", Opt_uid),
+ {}
};
-static int hypfs_parse_options(char *options, struct super_block *sb)
+static const struct fs_parameter_description hypfs_fs_parameters = {
+ .name = "hypfs",
+ .specs = hypfs_param_specs,
+};
+
+static int hypfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *str;
- substring_t args[MAX_OPT_ARGS];
+ struct hypfs_sb_info *hypfs_info = fc->s_fs_info;
+ struct fs_parse_result result;
kuid_t uid;
kgid_t gid;
-
- if (!options)
- return 0;
- while ((str = strsep(&options, ",")) != NULL) {
- int token, option;
- struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
-
- if (!*str)
- continue;
- token = match_token(str, hypfs_tokens, args);
- switch (token) {
- case opt_uid:
- if (match_int(&args[0], &option))
- return -EINVAL;
- uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(uid))
- return -EINVAL;
- hypfs_info->uid = uid;
- break;
- case opt_gid:
- if (match_int(&args[0], &option))
- return -EINVAL;
- gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(gid))
- return -EINVAL;
- hypfs_info->gid = gid;
- break;
- case opt_err:
- default:
- pr_err("%s is not a valid mount option\n", str);
- return -EINVAL;
- }
+ int opt;
+
+ opt = fs_parse(fc, &hypfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ return invalf(fc, "Unknown uid");
+ hypfs_info->uid = uid;
+ break;
+ case Opt_gid:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ return invalf(fc, "Unknown gid");
+ hypfs_info->gid = gid;
+ break;
}
return 0;
}
@@ -266,26 +258,18 @@ static int hypfs_show_options(struct seq_file *s, struct dentry *root)
return 0;
}
-static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
+static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct hypfs_sb_info *sbi = sb->s_fs_info;
struct inode *root_inode;
- struct dentry *root_dentry;
- int rc = 0;
- struct hypfs_sb_info *sbi;
+ struct dentry *root_dentry, *update_file;
+ int rc;
- sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
- mutex_init(&sbi->lock);
- sbi->uid = current_uid();
- sbi->gid = current_gid();
- sb->s_fs_info = sbi;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = HYPFS_MAGIC;
sb->s_op = &hypfs_s_ops;
- if (hypfs_parse_options(data, sb))
- return -EINVAL;
+
root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
if (!root_inode)
return -ENOMEM;
@@ -300,18 +284,46 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
rc = hypfs_diag_create_files(root_dentry);
if (rc)
return rc;
- sbi->update_file = hypfs_create_update_file(root_dentry);
- if (IS_ERR(sbi->update_file))
- return PTR_ERR(sbi->update_file);
+ update_file = hypfs_create_update_file(root_dentry);
+ if (IS_ERR(update_file))
+ return PTR_ERR(update_file);
+ sbi->update_file = update_file;
hypfs_update_update(sb);
pr_info("Hypervisor filesystem mounted\n");
return 0;
}
-static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
- const char *devname, void *data)
+static int hypfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, hypfs_fill_super);
+}
+
+static void hypfs_free_fc(struct fs_context *fc)
{
- return mount_single(fst, flags, data, hypfs_fill_super);
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations hypfs_context_ops = {
+ .free = hypfs_free_fc,
+ .parse_param = hypfs_parse_param,
+ .get_tree = hypfs_get_tree,
+};
+
+static int hypfs_init_fs_context(struct fs_context *fc)
+{
+ struct hypfs_sb_info *sbi;
+
+ sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+
+ mutex_init(&sbi->lock);
+ sbi->uid = current_uid();
+ sbi->gid = current_gid();
+
+ fc->s_fs_info = sbi;
+ fc->ops = &hypfs_context_ops;
+ return 0;
}
static void hypfs_kill_super(struct super_block *sb)
@@ -442,7 +454,8 @@ static const struct file_operations hypfs_file_ops = {
static struct file_system_type hypfs_type = {
.owner = THIS_MODULE,
.name = "s390_hypfs",
- .mount = hypfs_mount,
+ .init_fs_context = hypfs_init_fs_context,
+ .parameters = &hypfs_fs_parameters,
.kill_sb = hypfs_kill_super
};
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0c4600725fc2..36c578c0ff96 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1682,12 +1682,6 @@ extern void s390_reset_cmma(struct mm_struct *mm);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-/*
- * No page table caches to initialise
- */
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
-
#include <asm-generic/pgtable.h>
#endif /* _S390_PAGE_H */
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index b56f908b1395..8c6341a4d807 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -2,10 +2,8 @@
#ifndef __ASM_SH_PGALLOC_H
#define __ASM_SH_PGALLOC_H
-#include <linux/quicklist.h>
#include <asm/page.h>
-
-#define QUICK_PT 0 /* Other page table pages that are zero on free */
+#include <asm-generic/pgalloc.h>
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
@@ -29,41 +27,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
}
#define pmd_pgtable(pmd) pmd_page(pmd)
-/*
- * Allocate and free page tables.
- */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *page;
- void *pg;
-
- pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
- if (!pg)
- return NULL;
- page = virt_to_page(pg);
- if (!pgtable_page_ctor(page)) {
- quicklist_free(QUICK_PT, NULL, pg);
- return NULL;
- }
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- quicklist_free(QUICK_PT, NULL, pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
- pgtable_page_dtor(pte);
- quicklist_free_page(QUICK_PT, NULL, pte);
-}
-
#define __pte_free_tlb(tlb,pte,addr) \
do { \
pgtable_page_dtor(pte); \
@@ -79,9 +42,4 @@ do { \
} while (0);
#endif
-static inline void check_pgt_cache(void)
-{
- quicklist_trim(QUICK_PT, NULL, 25, 16);
-}
-
#endif /* __ASM_SH_PGALLOC_H */
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 9085d1142fa3..cbd0f3c55a0c 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -123,11 +123,6 @@ typedef pte_t *pte_addr_t;
#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
-/*
- * Initialise the page table caches
- */
-extern void pgtable_cache_init(void);
-
struct vm_area_struct;
struct mm_struct;
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 02ed2df25a54..5c8a2ebfc720 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -1,9 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
menu "Memory management options"
-config QUICKLIST
- def_bool y
-
config MMU
bool "Support for memory management hardware"
depends on !CPU_SH2
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index cc779a90d917..dca946f426c6 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -97,7 +97,3 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
{
}
-
-void pgtable_cache_init(void)
-{
-}
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 282be50a4adf..10538a4d1a1e 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -17,8 +17,6 @@ void srmmu_free_nocache(void *addr, int size);
extern struct resource sparc_iomap;
-#define check_pgt_cache() do { } while (0)
-
pgd_t *get_pgd_fast(void);
static inline void free_pgd_fast(pgd_t *pgd)
{
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 48abccba4991..9d3e5cc95bbb 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -69,8 +69,6 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage);
#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
#define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD))
-#define check_pgt_cache() do { } while (0)
-
void pgtable_free(void *table, bool is_page);
#ifdef CONFIG_SMP
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 4eebed6c6781..31da44826645 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -445,9 +445,4 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
#endif /* !(_SPARC_PGTABLE_H) */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1599de730532..b57f9c631eca 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1135,7 +1135,6 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
unsigned long);
#define HAVE_ARCH_FB_UNMAPPED_AREA
-void pgtable_cache_init(void);
void sun4v_register_fault_status(void);
void sun4v_ktsb_register(void);
void __init cheetah_ecache_flush_init(void);
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 046ab116cc8c..906eda1158b4 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -31,7 +31,6 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/vaddrs.h>
-#include <asm/pgalloc.h> /* bug in asm-generic/tlb.h: check_pgt_cache */
#include <asm/setup.h>
#include <asm/tlb.h>
#include <asm/prom.h>
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 023599c3fa51..446e0c0f4018 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -43,7 +43,5 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x))
#endif
-#define check_pgt_cache() do { } while (0)
-
#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index e4d3ed980d82..36a44d58f373 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -32,8 +32,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* zero page used for uninitialized stuff */
extern unsigned long *empty_zero_page;
-#define pgtable_cache_init() do ; while (0)
-
/* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 8MB value just means that there will be a 8MB "hole" after the
* physical memory until the kernel virtual memory starts. That means that
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index 3f0903bd98e9..ba1c9a79993b 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -18,8 +18,6 @@
#define __HAVE_ARCH_PTE_ALLOC_ONE
#include <asm-generic/pgalloc.h>
-#define check_pgt_cache() do { } while (0)
-
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT)
#define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_PRESENT)
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 126e961a8cb0..c8f7ba12f309 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -285,8 +285,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
#include <asm-generic/pgtable.h>
-#define pgtable_cache_init() do { } while (0)
-
#endif /* !__ASSEMBLY__ */
#endif /* __UNICORE_PGTABLE_H__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index c78da8eda8f2..0dca7f7aeff2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -29,8 +29,6 @@ extern pgd_t swapper_pg_dir[1024];
extern pgd_t initial_page_table[1024];
extern pmd_t initial_pg_pmd[];
-static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
void paging_init(void);
void sync_initial_page_table(void);
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4990d26dfc73..0b6c4042942a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -241,9 +241,6 @@ extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#define pgtable_cache_init() do { } while (0)
-#define check_pgt_cache() do { } while (0)
-
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 44816ff6411f..463940faf52f 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -357,7 +357,7 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
static struct kmem_cache *pgd_cache;
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
{
/*
* When PAE kernel is running as a Xen domain, it does not use
@@ -402,10 +402,6 @@ static inline void _pgd_free(pgd_t *pgd)
}
#else
-void __init pgd_cache_init(void)
-{
-}
-
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index ce3ff5e591b9..3f7fe5a8c286 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -238,7 +238,6 @@ extern void paging_init(void);
# define swapper_pg_dir NULL
static inline void paging_init(void) { }
#endif
-static inline void pgtable_cache_init(void) { }
/*
* The pmd contains the kernel virtual address of the pte page.
diff --git a/arch/xtensa/include/asm/tlbflush.h b/arch/xtensa/include/asm/tlbflush.h
index 06875feb27c2..856e2da2e397 100644
--- a/arch/xtensa/include/asm/tlbflush.h
+++ b/arch/xtensa/include/asm/tlbflush.h
@@ -160,9 +160,6 @@ static inline void invalidate_dtlb_mapping (unsigned address)
invalidate_dtlb_entry(tlb_entry);
}
-#define check_pgt_cache() do { } while (0)
-
-
/*
* DO NOT USE THESE FUNCTIONS. These instructions aren't part of the Xtensa
* ISA and exist only for test purposes..
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index b33be928d164..0319d6339822 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2016,7 +2016,7 @@ static void bfq_add_request(struct request *rq)
(bfqq->last_serv_time_ns > 0 &&
bfqd->rqs_injected && bfqd->rq_in_driver > 0)) &&
time_is_before_eq_jiffies(bfqq->decrease_time_jif +
- msecs_to_jiffies(100))) {
+ msecs_to_jiffies(10))) {
bfqd->last_empty_occupied_ns = ktime_get_ns();
/*
* Start the state machine for measuring the
@@ -2025,7 +2025,21 @@ static void bfq_add_request(struct request *rq)
* be set when rq will be dispatched.
*/
bfqd->wait_dispatch = true;
- bfqd->rqs_injected = false;
+ /*
+ * If there is no I/O in service in the drive,
+ * then possible injection occurred before the
+ * arrival of rq will not affect the total
+ * service time of rq. So the injection limit
+ * must not be updated as a function of such
+ * total service time, unless new injection
+ * occurs before rq is completed. To have the
+ * injection limit updated only in the latter
+ * case, reset rqs_injected here (rqs_injected
+ * will be set in case injection is performed
+ * on bfqq before rq is completed).
+ */
+ if (bfqd->rq_in_driver == 0)
+ bfqd->rqs_injected = false;
}
}
@@ -5784,14 +5798,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns;
unsigned int old_limit = bfqq->inject_limit;
- if (bfqq->last_serv_time_ns > 0) {
+ if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) {
u64 threshold = (bfqq->last_serv_time_ns * 3)>>1;
if (tot_time_ns >= threshold && old_limit > 0) {
bfqq->inject_limit--;
bfqq->decrease_time_jif = jiffies;
} else if (tot_time_ns < threshold &&
- old_limit < bfqd->max_rq_in_driver<<1)
+ old_limit <= bfqd->max_rq_in_driver)
bfqq->inject_limit++;
}
@@ -5809,12 +5823,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
*/
if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) ||
tot_time_ns < bfqq->last_serv_time_ns) {
+ if (bfqq->last_serv_time_ns == 0) {
+ /*
+ * Now we certainly have a base value: make sure we
+ * start trying injection.
+ */
+ bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
+ }
bfqq->last_serv_time_ns = tot_time_ns;
- /*
- * Now we certainly have a base value: make sure we
- * start trying injection.
- */
- bfqq->inject_limit = max_t(unsigned int, 1, old_limit);
} else if (!bfqd->rqs_injected && bfqd->rq_in_driver == 1)
/*
* No I/O injected and no request still in service in
@@ -5830,6 +5846,7 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
/* update complete, not waiting for any request completion any longer */
bfqd->waited_rq = NULL;
+ bfqd->rqs_injected = false;
}
/*
diff --git a/block/blk-core.c b/block/blk-core.c
index 875e8d105067..d5e668ec751b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
#include <linux/psi.h>
@@ -1436,6 +1437,12 @@ bool blk_update_request(struct request *req, blk_status_t error,
if (!req->bio)
return false;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+ error == BLK_STS_OK)
+ req->q->integrity.profile->complete_fn(req, nr_bytes);
+#endif
+
if (unlikely(error && !blk_rq_is_passthrough(req) &&
!(req->rq_flags & RQF_QUIET)))
print_req_error(req, error, __func__);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index ca39b4624cf8..ff1070edbb40 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -368,10 +368,21 @@ static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
return BLK_STS_OK;
}
+static void blk_integrity_nop_prepare(struct request *rq)
+{
+}
+
+static void blk_integrity_nop_complete(struct request *rq,
+ unsigned int nr_bytes)
+{
+}
+
static const struct blk_integrity_profile nop_profile = {
.name = "nop",
.generate_fn = blk_integrity_nop_fn,
.verify_fn = blk_integrity_nop_fn,
+ .prepare_fn = blk_integrity_nop_prepare,
+ .complete_fn = blk_integrity_nop_complete,
};
/**
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 20a49be536b5..29275f5a996f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -30,6 +30,7 @@
#include <trace/events/block.h>
#include <linux/blk-mq.h>
+#include <linux/t10-pi.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
@@ -700,6 +701,11 @@ void blk_mq_start_request(struct request *rq)
*/
rq->nr_phys_segments++;
}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
+ q->integrity.profile->prepare_fn(rq);
+#endif
}
EXPORT_SYMBOL(blk_mq_start_request);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 785dd58947f1..347dda16c2f4 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -266,6 +266,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
struct request *req = bd->rq;
struct bsg_set *bset =
container_of(q->tag_set, struct bsg_set, tag_set);
+ int sts = BLK_STS_IOERR;
int ret;
blk_mq_start_request(req);
@@ -274,14 +275,15 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_IOERR;
if (!bsg_prepare_job(dev, req))
- return BLK_STS_IOERR;
+ goto out;
ret = bset->job_fn(blk_mq_rq_to_pdu(req));
- if (ret)
- return BLK_STS_IOERR;
+ if (!ret)
+ sts = BLK_STS_OK;
+out:
put_device(dev);
- return BLK_STS_OK;
+ return sts;
}
/* called right after the request is allocated for the request_queue */
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 0c0094609dd6..9803c7e0376e 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -27,7 +27,7 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
* tag.
*/
static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
- csum_fn *fn, unsigned int type)
+ csum_fn *fn, enum t10_dif_type type)
{
unsigned int i;
@@ -37,7 +37,7 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
pi->guard_tag = fn(iter->data_buf, iter->interval);
pi->app_tag = 0;
- if (type == 1)
+ if (type == T10_PI_TYPE1_PROTECTION)
pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
else
pi->ref_tag = 0;
@@ -51,17 +51,18 @@ static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
}
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
- csum_fn *fn, unsigned int type)
+ csum_fn *fn, enum t10_dif_type type)
{
unsigned int i;
+ BUG_ON(type == T10_PI_TYPE0_PROTECTION);
+
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
struct t10_pi_tuple *pi = iter->prot_buf;
__be16 csum;
- switch (type) {
- case 1:
- case 2:
+ if (type == T10_PI_TYPE1_PROTECTION ||
+ type == T10_PI_TYPE2_PROTECTION) {
if (pi->app_tag == T10_PI_APP_ESCAPE)
goto next;
@@ -73,12 +74,10 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
iter->seed, be32_to_cpu(pi->ref_tag));
return BLK_STS_PROTECTION;
}
- break;
- case 3:
+ } else if (type == T10_PI_TYPE3_PROTECTION) {
if (pi->app_tag == T10_PI_APP_ESCAPE &&
pi->ref_tag == T10_PI_REF_ESCAPE)
goto next;
- break;
}
csum = fn(iter->data_buf, iter->interval);
@@ -102,94 +101,40 @@ next:
static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
{
- return t10_pi_generate(iter, t10_pi_crc_fn, 1);
+ return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
}
static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
{
- return t10_pi_generate(iter, t10_pi_ip_fn, 1);
+ return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
}
static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
{
- return t10_pi_verify(iter, t10_pi_crc_fn, 1);
+ return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
}
static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
{
- return t10_pi_verify(iter, t10_pi_ip_fn, 1);
-}
-
-static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
-{
- return t10_pi_generate(iter, t10_pi_crc_fn, 3);
-}
-
-static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
-{
- return t10_pi_generate(iter, t10_pi_ip_fn, 3);
-}
-
-static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
-{
- return t10_pi_verify(iter, t10_pi_crc_fn, 3);
+ return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
}
-static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
-{
- return t10_pi_verify(iter, t10_pi_ip_fn, 3);
-}
-
-const struct blk_integrity_profile t10_pi_type1_crc = {
- .name = "T10-DIF-TYPE1-CRC",
- .generate_fn = t10_pi_type1_generate_crc,
- .verify_fn = t10_pi_type1_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type1_crc);
-
-const struct blk_integrity_profile t10_pi_type1_ip = {
- .name = "T10-DIF-TYPE1-IP",
- .generate_fn = t10_pi_type1_generate_ip,
- .verify_fn = t10_pi_type1_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type1_ip);
-
-const struct blk_integrity_profile t10_pi_type3_crc = {
- .name = "T10-DIF-TYPE3-CRC",
- .generate_fn = t10_pi_type3_generate_crc,
- .verify_fn = t10_pi_type3_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type3_crc);
-
-const struct blk_integrity_profile t10_pi_type3_ip = {
- .name = "T10-DIF-TYPE3-IP",
- .generate_fn = t10_pi_type3_generate_ip,
- .verify_fn = t10_pi_type3_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type3_ip);
-
/**
- * t10_pi_prepare - prepare PI prior submitting request to device
+ * t10_pi_type1_prepare - prepare PI prior submitting request to device
* @rq: request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
*
* For Type 1/Type 2, the virtual start sector is the one that was
* originally submitted by the block layer for the ref_tag usage. Due to
* partitioning, MD/DM cloning, etc. the actual physical start sector is
* likely to be different. Remap protection information to match the
* physical LBA.
- *
- * Type 3 does not have a reference tag so no remapping is required.
*/
-void t10_pi_prepare(struct request *rq, u8 protection_type)
+static void t10_pi_type1_prepare(struct request *rq)
{
const int tuple_sz = rq->q->integrity.tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
struct bio *bio;
- if (protection_type == T10_PI_TYPE3_PROTECTION)
- return;
-
__rq_for_each_bio(bio, rq) {
struct bio_integrity_payload *bip = bio_integrity(bio);
u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -222,13 +167,11 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
}
}
-EXPORT_SYMBOL(t10_pi_prepare);
/**
- * t10_pi_complete - prepare PI prior returning request to the block layer
+ * t10_pi_type1_complete - prepare PI prior returning request to the blk layer
* @rq: request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
- * @intervals: total elements to prepare
+ * @nr_bytes: total bytes to prepare
*
* For Type 1/Type 2, the virtual start sector is the one that was
* originally submitted by the block layer for the ref_tag usage. Due to
@@ -236,19 +179,14 @@ EXPORT_SYMBOL(t10_pi_prepare);
* likely to be different. Since the physical start sector was submitted
* to the device, we should remap it back to virtual values expected by the
* block layer.
- *
- * Type 3 does not have a reference tag so no remapping is required.
*/
-void t10_pi_complete(struct request *rq, u8 protection_type,
- unsigned int intervals)
+static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
+ unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
const int tuple_sz = rq->q->integrity.tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
struct bio *bio;
- if (protection_type == T10_PI_TYPE3_PROTECTION)
- return;
-
__rq_for_each_bio(bio, rq) {
struct bio_integrity_payload *bip = bio_integrity(bio);
u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -276,4 +214,73 @@ void t10_pi_complete(struct request *rq, u8 protection_type,
}
}
}
-EXPORT_SYMBOL(t10_pi_complete);
+
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+ return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+{
+ return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+ return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+{
+ return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_prepare(struct request *rq)
+{
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
+const struct blk_integrity_profile t10_pi_type1_crc = {
+ .name = "T10-DIF-TYPE1-CRC",
+ .generate_fn = t10_pi_type1_generate_crc,
+ .verify_fn = t10_pi_type1_verify_crc,
+ .prepare_fn = t10_pi_type1_prepare,
+ .complete_fn = t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_crc);
+
+const struct blk_integrity_profile t10_pi_type1_ip = {
+ .name = "T10-DIF-TYPE1-IP",
+ .generate_fn = t10_pi_type1_generate_ip,
+ .verify_fn = t10_pi_type1_verify_ip,
+ .prepare_fn = t10_pi_type1_prepare,
+ .complete_fn = t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_ip);
+
+const struct blk_integrity_profile t10_pi_type3_crc = {
+ .name = "T10-DIF-TYPE3-CRC",
+ .generate_fn = t10_pi_type3_generate_crc,
+ .verify_fn = t10_pi_type3_verify_crc,
+ .prepare_fn = t10_pi_type3_prepare,
+ .complete_fn = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_crc);
+
+const struct blk_integrity_profile t10_pi_type3_ip = {
+ .name = "T10-DIF-TYPE3-IP",
+ .generate_fn = t10_pi_type3_generate_ip,
+ .verify_fn = t10_pi_type3_verify_ip,
+ .prepare_fn = t10_pi_type3_prepare,
+ .complete_fn = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_ip);
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 7cd0c9ac71ea..71511ae2dfcd 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -160,11 +160,17 @@ static const struct apd_device_desc hip08_i2c_desc = {
.setup = acpi_apd_setup,
.fixed_clk_rate = 250000000,
};
+
static const struct apd_device_desc thunderx2_i2c_desc = {
.setup = acpi_apd_setup,
.fixed_clk_rate = 125000000,
};
+static const struct apd_device_desc nxp_i2c_desc = {
+ .setup = acpi_apd_setup,
+ .fixed_clk_rate = 350000000,
+};
+
static const struct apd_device_desc hip08_spi_desc = {
.setup = acpi_apd_setup,
.fixed_clk_rate = 250000000,
@@ -238,6 +244,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
{ "HISI02A1", APD_ADDR(hip07_i2c_desc) },
{ "HISI02A2", APD_ADDR(hip08_i2c_desc) },
{ "HISI0173", APD_ADDR(hip08_spi_desc) },
+ { "NXP0001", APD_ADDR(nxp_i2c_desc) },
#endif
{ }
};
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 9e9583a6bba9..e742780950de 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -497,6 +497,7 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
if (of_property_read_u32(child, "reg", &port)) {
rc = -EINVAL;
+ of_node_put(child);
goto err_out;
}
@@ -514,14 +515,18 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
if (port_dev) {
rc = ahci_platform_get_regulator(hpriv, port,
&port_dev->dev);
- if (rc == -EPROBE_DEFER)
+ if (rc == -EPROBE_DEFER) {
+ of_node_put(child);
goto err_out;
+ }
}
#endif
rc = ahci_platform_get_phy(hpriv, port, dev, child);
- if (rc)
+ if (rc) {
+ of_node_put(child);
goto err_out;
+ }
enabled_ports++;
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 20c39d1bcef8..6bea4f3f8040 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -100,26 +100,9 @@ unsigned long __weak memory_block_size_bytes(void)
}
EXPORT_SYMBOL_GPL(memory_block_size_bytes);
-static unsigned long get_memory_block_size(void)
-{
- unsigned long block_sz;
-
- block_sz = memory_block_size_bytes();
-
- /* Validate blk_sz is a power of 2 and not less than section size */
- if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
- WARN_ON(1);
- block_sz = MIN_MEMORY_BLOCK_SIZE;
- }
-
- return block_sz;
-}
-
/*
- * use this as the physical section index that this memsection
- * uses.
+ * Show the first physical section index (number) of this memory block.
*/
-
static ssize_t phys_index_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -131,7 +114,10 @@ static ssize_t phys_index_show(struct device *dev,
}
/*
- * Show whether the section of memory is likely to be hot-removable
+ * Show whether the memory block is likely to be offlineable (or is already
+ * offline). Once offline, the memory block could be removed. The return
+ * value does, however, not indicate that there is a way to remove the
+ * memory block.
*/
static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -455,12 +441,12 @@ static DEVICE_ATTR_RO(phys_device);
static DEVICE_ATTR_RO(removable);
/*
- * Block size attribute stuff
+ * Show the memory block size (shared by all memory blocks).
*/
static ssize_t block_size_bytes_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%lx\n", get_memory_block_size());
+ return sprintf(buf, "%lx\n", memory_block_size_bytes());
}
static DEVICE_ATTR_RO(block_size_bytes);
@@ -670,10 +656,10 @@ static int init_memory_block(struct memory_block **memory,
return -ENOMEM;
mem->start_section_nr = block_id * sections_per_block;
- mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;
mem->state = state;
start_pfn = section_nr_to_pfn(mem->start_section_nr);
mem->phys_device = arch_get_memory_phys_device(start_pfn);
+ mem->nid = NUMA_NO_NODE;
ret = register_memory(mem);
@@ -810,19 +796,22 @@ static const struct attribute_group *memory_root_attr_groups[] = {
/*
* Initialize the sysfs support for memory devices...
*/
-int __init memory_dev_init(void)
+void __init memory_dev_init(void)
{
int ret;
int err;
unsigned long block_sz, nr;
+ /* Validate the configured memory block size */
+ block_sz = memory_block_size_bytes();
+ if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
+ panic("Memory block size not suitable: 0x%lx\n", block_sz);
+ sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
if (ret)
goto out;
- block_sz = get_memory_block_size();
- sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-
/*
* Create entries for memory sections that were found
* during boot and have been initialized
@@ -838,8 +827,7 @@ int __init memory_dev_init(void)
out:
if (ret)
- printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
- return ret;
+ panic("%s() failed: %d\n", __func__, ret);
}
/**
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 75b7e6f6535b..296546ffed6c 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -427,6 +427,8 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonHugePages: %8lu kB\n"
"Node %d ShmemHugePages: %8lu kB\n"
"Node %d ShmemPmdMapped: %8lu kB\n"
+ "Node %d FileHugePages: %8lu kB\n"
+ "Node %d FilePmdMapped: %8lu kB\n"
#endif
,
nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -452,6 +454,10 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
+ HPAGE_PMD_NR),
+ nid, K(node_page_state(pgdat, NR_FILE_THPS) *
+ HPAGE_PMD_NR),
+ nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
HPAGE_PMD_NR)
#endif
);
@@ -756,15 +762,13 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
static int register_mem_sect_under_node(struct memory_block *mem_blk,
void *arg)
{
+ unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
+ unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
+ unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
int ret, nid = *(int *)arg;
- unsigned long pfn, sect_start_pfn, sect_end_pfn;
+ unsigned long pfn;
- mem_blk->nid = nid;
-
- sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
- sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
- sect_end_pfn += PAGES_PER_SECTION - 1;
- for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+ for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
int page_nid;
/*
@@ -789,6 +793,13 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
if (page_nid != nid)
continue;
}
+
+ /*
+ * If this memory block spans multiple nodes, we only indicate
+ * the last processed node.
+ */
+ mem_blk->nid = nid;
+
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
@@ -804,32 +815,18 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
}
/*
- * Unregister memory block device under all nodes that it spans.
- * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes).
+ * Unregister a memory block device under the node it spans. Memory blocks
+ * with multiple nodes cannot be offlined and therefore also never be removed.
*/
void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
- unsigned long pfn, sect_start_pfn, sect_end_pfn;
- static nodemask_t unlinked_nodes;
-
- nodes_clear(unlinked_nodes);
- sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
- sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
- for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
- int nid;
+ if (mem_blk->nid == NUMA_NO_NODE)
+ return;
- nid = get_nid_for_pfn(pfn);
- if (nid < 0)
- continue;
- if (!node_online(nid))
- continue;
- if (node_test_and_set(nid, unlinked_nodes))
- continue;
- sysfs_remove_link(&node_devices[nid]->dev.kobj,
- kobject_name(&mem_blk->dev.kobj));
- sysfs_remove_link(&mem_blk->dev.kobj,
- kobject_name(&node_devices[nid]->dev.kobj));
- }
+ sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
+ kobject_name(&mem_blk->dev.kobj));
+ sysfs_remove_link(&mem_blk->dev.kobj,
+ kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
}
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a8e3815295fe..ac07e8c94c79 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -26,6 +26,7 @@
#include <linux/ioctl.h>
#include <linux/mutex.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -71,14 +72,17 @@ struct link_dead_args {
int index;
};
-#define NBD_TIMEDOUT 0
+#define NBD_RT_TIMEDOUT 0
+#define NBD_RT_DISCONNECT_REQUESTED 1
+#define NBD_RT_DISCONNECTED 2
+#define NBD_RT_HAS_PID_FILE 3
+#define NBD_RT_HAS_CONFIG_REF 4
+#define NBD_RT_BOUND 5
+#define NBD_RT_DESTROY_ON_DISCONNECT 6
+#define NBD_RT_DISCONNECT_ON_CLOSE 7
+
+#define NBD_DESTROY_ON_DISCONNECT 0
#define NBD_DISCONNECT_REQUESTED 1
-#define NBD_DISCONNECTED 2
-#define NBD_HAS_PID_FILE 3
-#define NBD_HAS_CONFIG_REF 4
-#define NBD_BOUND 5
-#define NBD_DESTROY_ON_DISCONNECT 6
-#define NBD_DISCONNECT_ON_CLOSE 7
struct nbd_config {
u32 flags;
@@ -113,6 +117,9 @@ struct nbd_device {
struct list_head list;
struct task_struct *task_recv;
struct task_struct *task_setup;
+
+ struct completion *destroy_complete;
+ unsigned long flags;
};
#define NBD_CMD_REQUEUED 1
@@ -223,6 +230,16 @@ static void nbd_dev_remove(struct nbd_device *nbd)
disk->private_data = NULL;
put_disk(disk);
}
+
+ /*
+ * Place this in the last just before the nbd is freed to
+ * make sure that the disk and the related kobject are also
+ * totally removed to avoid duplicate creation of the same
+ * one.
+ */
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
+ complete(nbd->destroy_complete);
+
kfree(nbd);
}
@@ -238,8 +255,8 @@ static void nbd_put(struct nbd_device *nbd)
static int nbd_disconnected(struct nbd_config *config)
{
- return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
- test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+ return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
+ test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
}
static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
@@ -257,9 +274,9 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
if (!nsock->dead) {
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
if (atomic_dec_return(&nbd->config->live_connections) == 0) {
- if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+ if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
&nbd->config->runtime_flags)) {
- set_bit(NBD_DISCONNECTED,
+ set_bit(NBD_RT_DISCONNECTED,
&nbd->config->runtime_flags);
dev_info(nbd_to_dev(nbd),
"Disconnected due to user request.\n");
@@ -333,7 +350,7 @@ static void sock_shutdown(struct nbd_device *nbd)
if (config->num_connections == 0)
return;
- if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return;
for (i = 0; i < config->num_connections; i++) {
@@ -427,7 +444,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
}
dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
- set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+ set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
@@ -795,7 +812,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
struct nbd_sock *nsock = config->socks[index];
int fallback = nsock->fallback_index;
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return new_index;
if (config->num_connections <= 1) {
@@ -836,7 +853,7 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return 0;
return wait_event_timeout(config->conn_wait,
atomic_read(&config->live_connections) > 0,
@@ -969,12 +986,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
return err;
if (!netlink && !nbd->task_setup &&
- !test_bit(NBD_BOUND, &config->runtime_flags))
+ !test_bit(NBD_RT_BOUND, &config->runtime_flags))
nbd->task_setup = current;
if (!netlink &&
(nbd->task_setup != current ||
- test_bit(NBD_BOUND, &config->runtime_flags))) {
+ test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
dev_err(disk_to_dev(nbd->disk),
"Device being setup by another task");
sockfd_put(sock);
@@ -1053,7 +1070,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
mutex_unlock(&nsock->tx_lock);
sockfd_put(old);
- clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
+ clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
/* We take the tx_mutex in an error path in the recv_work, so we
* need to queue_work outside of the tx_mutex.
@@ -1124,7 +1141,8 @@ static int nbd_disconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
- set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+ set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
+ set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
send_disconnects(nbd);
return 0;
}
@@ -1143,7 +1161,7 @@ static void nbd_config_put(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
nbd_dev_dbg_close(nbd);
nbd_size_clear(nbd);
- if (test_and_clear_bit(NBD_HAS_PID_FILE,
+ if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
&config->runtime_flags))
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
nbd->task_recv = NULL;
@@ -1209,7 +1227,7 @@ static int nbd_start_device(struct nbd_device *nbd)
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
return error;
}
- set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
+ set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
nbd_dev_dbg_init(nbd);
for (i = 0; i < num_connections; i++) {
@@ -1256,9 +1274,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
- if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
- if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+ if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
ret = -ETIMEDOUT;
return ret;
}
@@ -1269,7 +1287,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
sock_shutdown(nbd);
__invalidate_device(bdev, true);
nbd_bdev_reset(bdev);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}
@@ -1364,7 +1382,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
/* Don't allow ioctl operations on a nbd device that was created with
* netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
*/
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
(cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
error = __nbd_ioctl(bdev, nbd, cmd, arg);
else
@@ -1435,7 +1453,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
struct nbd_device *nbd = disk->private_data;
struct block_device *bdev = bdget_disk(disk, 0);
- if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+ if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
bdev->bd_openers == 0)
nbd_disconnect_and_put(nbd);
@@ -1636,6 +1654,7 @@ static int nbd_dev_add(int index)
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
+ nbd->destroy_complete = NULL;
err = blk_mq_alloc_tag_set(&nbd->tag_set);
if (err)
@@ -1750,6 +1769,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
{
+ DECLARE_COMPLETION_ONSTACK(destroy_complete);
struct nbd_device *nbd = NULL;
struct nbd_config *config;
int index = -1;
@@ -1801,6 +1821,17 @@ again:
mutex_unlock(&nbd_index_mutex);
return -EINVAL;
}
+
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+ test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
+ nbd->destroy_complete = &destroy_complete;
+ mutex_unlock(&nbd_index_mutex);
+
+ /* Wait untill the the nbd stuff is totally destroyed */
+ wait_for_completion(&destroy_complete);
+ goto again;
+ }
+
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
if (index == -1)
@@ -1833,7 +1864,7 @@ again:
return -ENOMEM;
}
refcount_set(&nbd->config_refs, 1);
- set_bit(NBD_BOUND, &config->runtime_flags);
+ set_bit(NBD_RT_BOUND, &config->runtime_flags);
ret = nbd_genl_size_set(info, nbd);
if (ret)
@@ -1853,12 +1884,15 @@ again:
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- set_bit(NBD_DESTROY_ON_DISCONNECT,
+ set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
&config->runtime_flags);
+ set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
put_dev = true;
+ } else {
+ clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
- set_bit(NBD_DISCONNECT_ON_CLOSE,
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}
@@ -1897,7 +1931,7 @@ again:
out:
mutex_unlock(&nbd->config_lock);
if (!ret) {
- set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+ set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
refcount_inc(&nbd->config_refs);
nbd_connect_reply(info, nbd->index);
}
@@ -1919,7 +1953,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
* queue.
*/
flush_workqueue(nbd->recv_workq);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}
@@ -2003,7 +2037,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&nbd->config_lock);
config = nbd->config;
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
!nbd->task_recv) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
@@ -2026,20 +2060,22 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+ if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
&config->runtime_flags))
put_dev = true;
+ set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
} else {
- if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+ if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
&config->runtime_flags))
refcount_inc(&nbd->refs);
+ clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
- set_bit(NBD_DISCONNECT_ON_CLOSE,
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
} else {
- clear_bit(NBD_DISCONNECT_ON_CLOSE,
+ clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 024060165afa..76457003f140 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2594,7 +2594,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
if (ret)
return ret;
if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
- WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
return -EINVAL;
}
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index c70cb5f272cf..0891ab829b1b 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1078,7 +1078,7 @@ new_buf:
bool merge;
if (page)
- pg_size <<= compound_order(page);
+ pg_size = page_size(page);
if (off < pg_size &&
skb_can_coalesce(skb, i, page, off)) {
merge = 1;
@@ -1105,8 +1105,7 @@ new_buf:
__GFP_NORETRY,
order);
if (page)
- pg_size <<=
- compound_order(page);
+ pg_size <<= order;
}
if (!page) {
page = alloc_page(gfp);
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index 98bccace8c1c..3d61c4fb4dec 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -978,10 +978,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv)
{
int ret;
- adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec",
+ adv->i2c_cec = i2c_new_ancillary_device(adv->i2c_main, "cec",
ADV7511_CEC_I2C_ADDR_DEFAULT);
- if (!adv->i2c_cec)
- return -EINVAL;
+ if (IS_ERR(adv->i2c_cec))
+ return PTR_ERR(adv->i2c_cec);
i2c_set_clientdata(adv->i2c_cec, adv);
adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
@@ -1162,20 +1162,20 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
adv7511_packet_disable(adv7511, 0xffff);
- adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid",
+ adv7511->i2c_edid = i2c_new_ancillary_device(i2c, "edid",
ADV7511_EDID_I2C_ADDR_DEFAULT);
- if (!adv7511->i2c_edid) {
- ret = -EINVAL;
+ if (IS_ERR(adv7511->i2c_edid)) {
+ ret = PTR_ERR(adv7511->i2c_edid);
goto uninit_regulators;
}
regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR,
adv7511->i2c_edid->addr << 1);
- adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet",
+ adv7511->i2c_packet = i2c_new_ancillary_device(i2c, "packet",
ADV7511_PACKET_I2C_ADDR_DEFAULT);
- if (!adv7511->i2c_packet) {
- ret = -EINVAL;
+ if (IS_ERR(adv7511->i2c_packet)) {
+ ret = PTR_ERR(adv7511->i2c_packet);
goto err_i2c_unregister_edid;
}
diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index feaa538026a0..3db000aacd26 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -174,7 +174,6 @@ via_map_blit_for_device(struct pci_dev *pdev,
static void
via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
{
- struct page *page;
int i;
switch (vsg->state) {
@@ -189,13 +188,8 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
kfree(vsg->desc_pages);
/* fall through */
case dr_via_pages_locked:
- for (i = 0; i < vsg->num_pages; ++i) {
- if (NULL != (page = vsg->pages[i])) {
- if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
- SetPageDirty(page);
- put_page(page);
- }
- }
+ put_user_pages_dirty_lock(vsg->pages, vsg->num_pages,
+ (vsg->direction == DMA_FROM_DEVICE));
/* fall through */
case dr_via_pages_alloc:
vfree(vsg->pages);
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index 7795831d37c2..cc5b09b87ab0 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -104,8 +104,8 @@ struct synthhid_input_report {
#pragma pack(pop)
-#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE)
-#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE)
+#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024)
+#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024)
enum pipe_prot_msg_type {
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index addcef50df7a..8eb167540b4f 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
cpumask_clear_cpu(channel->target_cpu,
&primary_channel->alloced_cpus_in_node);
- vmbus_release_relid(channel->offermsg.child_relid);
+ /*
+ * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+ * the relid is invalidated; after hibernation, when the user-space app
+ * destroys the channel, the relid is INVALID_RELID, and in this case
+ * it's unnecessary and unsafe to release the old relid, since the same
+ * relid can refer to a completely different channel now.
+ */
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ vmbus_release_relid(channel->offermsg.child_relid);
free_channel(channel);
}
@@ -545,6 +553,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
mutex_lock(&vmbus_connection.channel_mutex);
+ /* Remember the channels that should be cleaned up upon suspend. */
+ if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+ atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
/*
* Now that we have acquired the channel_mutex,
* we can release the potentially racing rescind thread.
@@ -847,6 +859,67 @@ void vmbus_initiate_unload(bool crash)
vmbus_wait_for_unload();
}
+static void check_ready_for_resume_event(void)
+{
+ /*
+ * If all the old primary channels have been fixed up, then it's safe
+ * to resume.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+ complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+ struct vmbus_channel_offer_channel *offer)
+{
+ /*
+ * Setup state for signalling the host.
+ */
+ channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+ if (vmbus_proto_version != VERSION_WS2008) {
+ channel->is_dedicated_interrupt =
+ (offer->is_dedicated_interrupt != 0);
+ channel->sig_event = offer->connection_id;
+ }
+
+ memcpy(&channel->offermsg, offer,
+ sizeof(struct vmbus_channel_offer_channel));
+ channel->monitor_grp = (u8)offer->monitorid / 32;
+ channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
+/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+ struct vmbus_channel *channel = NULL, *iter;
+ const guid_t *inst1, *inst2;
+
+ /* Ignore sub-channel offers. */
+ if (offer->offer.sub_channel_index != 0)
+ return NULL;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+ inst1 = &iter->offermsg.offer.if_instance;
+ inst2 = &offer->offer.if_instance;
+
+ if (guid_equal(inst1, inst2)) {
+ channel = iter;
+ break;
+ }
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ return channel;
+}
+
/*
* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
*
@@ -854,12 +927,58 @@ void vmbus_initiate_unload(bool crash)
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_offer_channel *offer;
- struct vmbus_channel *newchannel;
+ struct vmbus_channel *oldchannel, *newchannel;
+ size_t offer_sz;
offer = (struct vmbus_channel_offer_channel *)hdr;
trace_vmbus_onoffer(offer);
+ oldchannel = find_primary_channel_by_offer(offer);
+
+ if (oldchannel != NULL) {
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
+ /*
+ * We're resuming from hibernation: all the sub-channel and
+ * hv_sock channels we had before the hibernation should have
+ * been cleaned up, and now we must be seeing a re-offered
+ * primary channel that we had before the hibernation.
+ */
+
+ WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+ /* Fix up the relid. */
+ oldchannel->offermsg.child_relid = offer->child_relid;
+
+ offer_sz = sizeof(*offer);
+ if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+ check_ready_for_resume_event();
+ return;
+ }
+
+ /*
+ * This is not an error, since the host can also change the
+ * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+ * the offer->connection_id of the Mellanox VF vmbus device
+ * can change when the host reoffers the device upon resume.
+ */
+ pr_debug("vmbus offer changed: relid=%d\n",
+ offer->child_relid);
+
+ print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, &oldchannel->offermsg, offer_sz,
+ false);
+ print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, offer, offer_sz, false);
+
+ /* Fix up the old channel. */
+ vmbus_setup_channel_state(oldchannel, offer);
+
+ check_ready_for_resume_event();
+
+ return;
+ }
+
/* Allocate the channel object and save this offer. */
newchannel = alloc_channel();
if (!newchannel) {
@@ -869,25 +988,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
return;
}
- /*
- * Setup state for signalling the host.
- */
- newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
- if (vmbus_proto_version != VERSION_WS2008) {
- newchannel->is_dedicated_interrupt =
- (offer->is_dedicated_interrupt != 0);
- newchannel->sig_event = offer->connection_id;
- }
-
- memcpy(&newchannel->offermsg, offer,
- sizeof(struct vmbus_channel_offer_channel));
- newchannel->monitor_grp = (u8)offer->monitorid / 32;
- newchannel->monitor_bit = (u8)offer->monitorid % 32;
+ vmbus_setup_channel_state(newchannel, offer);
vmbus_process_offer(newchannel);
}
+static void check_ready_for_suspend_event(void)
+{
+ /*
+ * If all the sub-channels or hv_sock channels have been cleaned up,
+ * then it's safe to suspend.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+ complete(&vmbus_connection.ready_for_suspend_event);
+}
+
/*
* vmbus_onoffer_rescind - Rescind offer handler.
*
@@ -898,6 +1013,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
struct device *dev;
+ bool clean_up_chan_for_suspend;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
@@ -937,6 +1053,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
return;
}
+ clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+ is_sub_channel(channel);
/*
* Before setting channel->rescind in vmbus_rescind_cleanup(), we
* should make sure the channel callback is not running any more.
@@ -962,6 +1080,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
+
return;
}
/*
@@ -994,6 +1116,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
+
+ /* The "channel" may have been freed. Do not access it any longer. */
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
}
void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e15d4a0..6e4c015783ff 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -26,6 +26,11 @@
struct vmbus_connection vmbus_connection = {
.conn_state = DISCONNECTED,
.next_gpadl_handle = ATOMIC_INIT(0xE1E10),
+
+ .ready_for_suspend_event= COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_suspend_event),
+ .ready_for_resume_event = COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_resume_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
@@ -59,8 +64,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
}
}
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
- __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
{
int ret = 0;
unsigned int cur_cpu;
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 6188fb7dda42..fcc52797c169 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -154,7 +154,7 @@ void hv_synic_free(void)
* retrieve the initialized message and event pages. Otherwise, we create and
* initialize the message and event pages.
*/
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
{
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
sctrl.enable = 1;
hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+ hv_synic_enable_regs(cpu);
hv_stimer_init(cpu);
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
{
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
+
+ hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ shared_sint.masked = 1;
+
+ /* Need to correctly cleanup in the case of SMP!!! */
+ /* Disable the interrupt */
+ hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ hv_get_simp(simp.as_uint64);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+
+ hv_set_simp(simp.as_uint64);
+
+ hv_get_siefp(siefp.as_uint64);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+
+ hv_set_siefp(siefp.as_uint64);
+
+ /* Disable the global synic bit */
+ hv_get_synic_state(sctrl.as_uint64);
+ sctrl.enable = 0;
+ hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
struct vmbus_channel *channel, *sc;
bool channel_found = false;
unsigned long flags;
- hv_get_synic_state(sctrl.as_uint64);
- if (sctrl.enable != 1)
- return -EFAULT;
-
/*
* Search for channels which are bound to the CPU we're about to
* cleanup. In case we find one and vmbus is still connected we need to
@@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
hv_stimer_cleanup(cpu);
- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- shared_sint.masked = 1;
-
- /* Need to correctly cleanup in the case of SMP!!! */
- /* Disable the interrupt */
- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- hv_get_simp(simp.as_uint64);
- simp.simp_enabled = 0;
- simp.base_simp_gpa = 0;
-
- hv_set_simp(simp.as_uint64);
-
- hv_get_siefp(siefp.as_uint64);
- siefp.siefp_enabled = 0;
- siefp.base_siefp_gpa = 0;
-
- hv_set_siefp(siefp.as_uint64);
-
- /* Disable the global synic bit */
- sctrl.enable = 0;
- hv_set_synic_state(sctrl.as_uint64);
+ hv_synic_disable_regs(cpu);
return 0;
}
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 6fb4ea5f0304..34bd73526afd 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -494,7 +494,7 @@ enum hv_dm_state {
static __u8 recv_buffer[PAGE_SIZE];
-static __u8 *send_buffer;
+static __u8 balloon_up_send_buffer[PAGE_SIZE];
#define PAGES_IN_2M 512
#define HA_CHUNK (32 * 1024)
@@ -1292,8 +1292,8 @@ static void balloon_up(struct work_struct *dummy)
}
while (!done) {
- bl_resp = (struct dm_balloon_response *)send_buffer;
- memset(send_buffer, 0, PAGE_SIZE);
+ memset(balloon_up_send_buffer, 0, PAGE_SIZE);
+ bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
bl_resp->hdr.type = DM_BALLOON_RESPONSE;
bl_resp->hdr.size = sizeof(struct dm_balloon_response);
bl_resp->more_pages = 1;
@@ -1564,58 +1564,18 @@ static void balloon_onchannelcallback(void *context)
}
-static int balloon_probe(struct hv_device *dev,
- const struct hv_vmbus_device_id *dev_id)
+static int balloon_connect_vsp(struct hv_device *dev)
{
- int ret;
- unsigned long t;
struct dm_version_request version_req;
struct dm_capabilities cap_msg;
-
-#ifdef CONFIG_MEMORY_HOTPLUG
- do_hot_add = hot_add;
-#else
- do_hot_add = false;
-#endif
-
- /*
- * First allocate a send buffer.
- */
-
- send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!send_buffer)
- return -ENOMEM;
+ unsigned long t;
+ int ret;
ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
- balloon_onchannelcallback, dev);
-
+ balloon_onchannelcallback, dev);
if (ret)
- goto probe_error0;
+ return ret;
- dm_device.dev = dev;
- dm_device.state = DM_INITIALIZING;
- dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
- init_completion(&dm_device.host_event);
- init_completion(&dm_device.config_event);
- INIT_LIST_HEAD(&dm_device.ha_region_list);
- spin_lock_init(&dm_device.ha_lock);
- INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
- INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
- dm_device.host_specified_ha_region = false;
-
- dm_device.thread =
- kthread_run(dm_thread_func, &dm_device, "hv_balloon");
- if (IS_ERR(dm_device.thread)) {
- ret = PTR_ERR(dm_device.thread);
- goto probe_error1;
- }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
- set_online_page_callback(&hv_online_page);
- register_memory_notifier(&hv_memory_nb);
-#endif
-
- hv_set_drvdata(dev, &dm_device);
/*
* Initiate the hand shake with the host and negotiate
* a version that the host can support. We start with the
@@ -1631,16 +1591,15 @@ static int balloon_probe(struct hv_device *dev,
dm_device.version = version_req.version.version;
ret = vmbus_sendpacket(dev->channel, &version_req,
- sizeof(struct dm_version_request),
- (unsigned long)NULL,
- VM_PKT_DATA_INBAND, 0);
+ sizeof(struct dm_version_request),
+ (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
- goto probe_error2;
+ goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
- goto probe_error2;
+ goto out;
}
/*
@@ -1648,8 +1607,8 @@ static int balloon_probe(struct hv_device *dev,
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
- ret = -ETIMEDOUT;
- goto probe_error2;
+ ret = -EPROTO;
+ goto out;
}
pr_info("Using Dynamic Memory protocol version %u.%u\n",
@@ -1682,16 +1641,15 @@ static int balloon_probe(struct hv_device *dev,
cap_msg.max_page_number = -1;
ret = vmbus_sendpacket(dev->channel, &cap_msg,
- sizeof(struct dm_capabilities),
- (unsigned long)NULL,
- VM_PKT_DATA_INBAND, 0);
+ sizeof(struct dm_capabilities),
+ (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
- goto probe_error2;
+ goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
- goto probe_error2;
+ goto out;
}
/*
@@ -1699,25 +1657,65 @@ static int balloon_probe(struct hv_device *dev,
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
- ret = -ETIMEDOUT;
- goto probe_error2;
+ ret = -EPROTO;
+ goto out;
}
+ return 0;
+out:
+ vmbus_close(dev->channel);
+ return ret;
+}
+
+static int balloon_probe(struct hv_device *dev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ int ret;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ do_hot_add = hot_add;
+#else
+ do_hot_add = false;
+#endif
+ dm_device.dev = dev;
+ dm_device.state = DM_INITIALIZING;
+ dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
+ init_completion(&dm_device.host_event);
+ init_completion(&dm_device.config_event);
+ INIT_LIST_HEAD(&dm_device.ha_region_list);
+ spin_lock_init(&dm_device.ha_lock);
+ INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+ INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+ dm_device.host_specified_ha_region = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ set_online_page_callback(&hv_online_page);
+ register_memory_notifier(&hv_memory_nb);
+#endif
+
+ hv_set_drvdata(dev, &dm_device);
+
+ ret = balloon_connect_vsp(dev);
+ if (ret != 0)
+ return ret;
+
dm_device.state = DM_INITIALIZED;
- last_post_time = jiffies;
+
+ dm_device.thread =
+ kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+ if (IS_ERR(dm_device.thread)) {
+ ret = PTR_ERR(dm_device.thread);
+ goto probe_error;
+ }
return 0;
-probe_error2:
+probe_error:
+ vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
+ unregister_memory_notifier(&hv_memory_nb);
restore_online_page_callback(&hv_online_page);
#endif
- kthread_stop(dm_device.thread);
-
-probe_error1:
- vmbus_close(dev->channel);
-probe_error0:
- kfree(send_buffer);
return ret;
}
@@ -1734,12 +1732,11 @@ static int balloon_remove(struct hv_device *dev)
cancel_work_sync(&dm->balloon_wrk.wrk);
cancel_work_sync(&dm->ha_wrk.wrk);
- vmbus_close(dev->channel);
kthread_stop(dm->thread);
- kfree(send_buffer);
+ vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
- restore_online_page_callback(&hv_online_page);
unregister_memory_notifier(&hv_memory_nb);
+ restore_online_page_callback(&hv_online_page);
#endif
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 50eaa1fd6e45..af9379a3bf89 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -169,8 +169,10 @@ extern int hv_synic_alloc(void);
extern void hv_synic_free(void);
+extern void hv_synic_enable_regs(unsigned int cpu);
extern int hv_synic_init(unsigned int cpu);
+extern void hv_synic_disable_regs(unsigned int cpu);
extern int hv_synic_cleanup(unsigned int cpu);
/* Interface */
@@ -256,6 +258,32 @@ struct vmbus_connection {
struct workqueue_struct *work_queue;
struct workqueue_struct *handle_primary_chan_wq;
struct workqueue_struct *handle_sub_chan_wq;
+
+ /*
+ * The number of sub-channels and hv_sock channels that should be
+ * cleaned up upon suspend: sub-channels will be re-created upon
+ * resume, and hv_sock channels should not survive suspend.
+ */
+ atomic_t nr_chan_close_on_suspend;
+ /*
+ * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+ * drop to zero.
+ */
+ struct completion ready_for_suspend_event;
+
+ /*
+ * The number of primary channels that should be "fixed up"
+ * upon resume: these channels are re-offered upon resume, and some
+ * fields of the channel offers (i.e. child_relid and connection_id)
+ * can change, so the old offermsg must be fixed up, before the resume
+ * callbacks of the VSC drivers start to further touch the channels.
+ */
+ atomic_t nr_chan_fixup_on_resume;
+ /*
+ * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+ * drop to zero.
+ */
+ struct completion ready_for_resume_event;
};
@@ -270,6 +298,8 @@ struct vmbus_msginfo {
extern struct vmbus_connection vmbus_connection;
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
static inline void vmbus_send_interrupt(u32 relid)
{
sync_set_bit(relid, vmbus_connection.send_int_page);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc35290..391f0b225c9a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -24,12 +24,14 @@
#include <linux/sched/task_stack.h>
#include <asm/mshyperv.h>
+#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
@@ -910,6 +912,43 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->suspend)
+ return -EOPNOTSUPP;
+
+ return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->resume)
+ return -EOPNOTSUPP;
+
+ return drv->resume(dev);
+}
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -925,6 +964,14 @@ static void vmbus_device_release(struct device *device)
kfree(hv_dev);
}
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
/* The one and only one */
static struct bus_type hv_bus = {
.name = "vmbus",
@@ -935,6 +982,7 @@ static struct bus_type hv_bus = {
.uevent = vmbus_uevent,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
+ .pm = &vmbus_pm,
};
struct onmessage_work_context {
@@ -1022,6 +1070,41 @@ msg_handled:
vmbus_signal_eom(msg, message_type);
}
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+ struct onmessage_work_context *ctx;
+ struct vmbus_channel_rescind_offer *rescind;
+
+ WARN_ON(!is_hvsock_channel(channel));
+
+ /*
+ * sizeof(*ctx) is small and the allocation should really not fail,
+ * otherwise the state of the hv_sock connections ends up in limbo.
+ */
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+ /*
+ * So far, these are not really used by Linux. Just set them to the
+ * reasonable values conforming to the definitions of the fields.
+ */
+ ctx->msg.header.message_type = 1;
+ ctx->msg.header.payload_size = sizeof(*rescind);
+
+ /* These values are actually used by Linux. */
+ rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+ rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+ rescind->child_relid = channel->offermsg.child_relid;
+
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+ queue_work_on(vmbus_connection.connect_cpu,
+ vmbus_connection.work_queue,
+ &ctx->work);
+}
/*
* Direct callback for channels using other deferred processing
@@ -2042,6 +2125,129 @@ acpi_walk_err:
return ret_val;
}
+static int vmbus_bus_suspend(struct device *dev)
+{
+ struct vmbus_channel *channel, *sc;
+ unsigned long flags;
+
+ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+ /*
+ * We wait here until the completion of any channel
+ * offers that are currently in progress.
+ */
+ msleep(1);
+ }
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (!is_hvsock_channel(channel))
+ continue;
+
+ vmbus_force_channel_rescinded(channel);
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ /*
+ * Wait until all the sub-channels and hv_sock channels have been
+ * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+ * they would conflict with the new sub-channels that will be created
+ * in the resume path. hv_sock channels should also be destroyed, but
+ * a hv_sock channel of an established hv_sock connection can not be
+ * really destroyed since it may still be referenced by the userspace
+ * application, so we just force the hv_sock channel to be rescinded
+ * by vmbus_force_channel_rescinded(), and the userspace application
+ * will thoroughly destroy the channel after hibernation.
+ *
+ * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+ * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+ */
+ if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+ wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ /*
+ * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+ * up the field, and the other fields (if necessary).
+ */
+ channel->offermsg.child_relid = INVALID_RELID;
+
+ if (is_hvsock_channel(channel)) {
+ if (!channel->rescind) {
+ pr_err("hv_sock channel not rescinded!\n");
+ WARN_ON_ONCE(1);
+ }
+ continue;
+ }
+
+ spin_lock_irqsave(&channel->lock, flags);
+ list_for_each_entry(sc, &channel->sc_list, sc_list) {
+ pr_err("Sub-channel not deleted!\n");
+ WARN_ON_ONCE(1);
+ }
+ spin_unlock_irqrestore(&channel->lock, flags);
+
+ atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ vmbus_initiate_unload(false);
+
+ vmbus_connection.conn_state = DISCONNECTED;
+
+ /* Reset the event for the next resume. */
+ reinit_completion(&vmbus_connection.ready_for_resume_event);
+
+ return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+ struct vmbus_channel_msginfo *msginfo;
+ size_t msgsize;
+ int ret;
+
+ /*
+ * We only use the 'vmbus_proto_version', which was in use before
+ * hibernation, to re-negotiate with the host.
+ */
+ if (vmbus_proto_version == VERSION_INVAL ||
+ vmbus_proto_version == 0) {
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+ return -EINVAL;
+ }
+
+ msgsize = sizeof(*msginfo) +
+ sizeof(struct vmbus_channel_initiate_contact);
+
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+ if (msginfo == NULL)
+ return -ENOMEM;
+
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+ kfree(msginfo);
+
+ if (ret != 0)
+ return ret;
+
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
+ vmbus_request_offers();
+
+ wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
+ /* Reset the event for the next suspend. */
+ reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
+ return 0;
+}
+
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
@@ -2049,6 +2255,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
static struct acpi_driver vmbus_acpi_driver = {
.name = "vmbus",
.ids = vmbus_acpi_device_ids,
@@ -2056,6 +2275,7 @@ static struct acpi_driver vmbus_acpi_driver = {
.add = vmbus_acpi_add,
.remove = vmbus_acpi_remove,
},
+ .drv.pm = &vmbus_bus_pm,
};
static void hv_kexec_handler(void)
@@ -2086,6 +2306,47 @@ static void hv_crash_handler(struct pt_regs *regs)
hyperv_cleanup();
};
+static int hv_synic_suspend(void)
+{
+ /*
+ * When we reach here, all the non-boot CPUs have been offlined, and
+ * the stimers on them have been unbound in hv_synic_cleanup() ->
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
+ *
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+ * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+ * because the interrupts remain disabled between syscore_suspend()
+ * and syscore_resume(): see create_image() and resume_target_kernel();
+ * 2) the stimer on CPU0 is automatically disabled later by
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+ * would be triggered if we call clockevents_unbind_device(), which
+ * may sleep, in an interrupts-disabled context. So, we intentionally
+ * don't call hv_stimer_cleanup(0) here.
+ */
+
+ hv_synic_disable_regs(0);
+
+ return 0;
+}
+
+static void hv_synic_resume(void)
+{
+ hv_synic_enable_regs(0);
+
+ /*
+ * Note: we don't need to call hv_stimer_init(0), because the timer
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+ * automatically re-enabled in timekeeping_resume().
+ */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+ .suspend = hv_synic_suspend,
+ .resume = hv_synic_resume,
+};
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -2116,6 +2377,8 @@ static int __init hv_acpi_init(void)
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
+ register_syscore_ops(&hv_synic_syscore_ops);
+
return 0;
cleanup:
@@ -2128,6 +2391,8 @@ static void __exit vmbus_exit(void)
{
int cpu;
+ unregister_syscore_ops(&hv_synic_syscore_ops);
+
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 5587215b8ddb..146ce40d8e0a 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -429,6 +429,7 @@ config I2C_AXXIA
tristate "Axxia I2C controller"
depends on ARCH_AXXIA || COMPILE_TEST
default ARCH_AXXIA
+ select I2C_SLAVE
help
Say yes if you want to support the I2C bus on Axxia platforms.
@@ -977,7 +978,7 @@ config I2C_SIRF
will be called i2c-sirf.
config I2C_SPRD
- bool "Spreadtrum I2C interface"
+ tristate "Spreadtrum I2C interface"
depends on I2C=y && ARCH_SPRD
help
If you say yes to this option, support will be included for the
@@ -1309,6 +1310,20 @@ config I2C_ELEKTOR
This support is also available as a module. If so, the module
will be called i2c-elektor.
+config I2C_ICY
+ tristate "ICY Zorro card"
+ depends on ZORRO
+ select I2C_ALGOPCF
+ help
+ This supports the PCF8584 Zorro bus I2C adapter, known as ICY.
+ Say Y if you own such an adapter.
+
+ This support is also available as a module. If so, the module
+ will be called i2c-icy.
+
+ If you have a 2019 edition board with an LTC2990 sensor at address
+ 0x4c, loading the module 'ltc2990' is sufficient to enable it.
+
config I2C_MLXCPLD
tristate "Mellanox I2C driver"
depends on X86_64
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 80c23895eaaf..3ab8aebc39c9 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -140,6 +140,7 @@ obj-$(CONFIG_I2C_BCM_KONA) += i2c-bcm-kona.o
obj-$(CONFIG_I2C_BRCMSTB) += i2c-brcmstb.o
obj-$(CONFIG_I2C_CROS_EC_TUNNEL) += i2c-cros-ec-tunnel.o
obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o
+obj-$(CONFIG_I2C_ICY) += i2c-icy.o
obj-$(CONFIG_I2C_MLXCPLD) += i2c-mlxcpld.o
obj-$(CONFIG_I2C_OPAL) += i2c-opal.o
obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o
diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c
index ff3142b15cab..0214daa913ff 100644
--- a/drivers/i2c/busses/i2c-axxia.c
+++ b/drivers/i2c/busses/i2c-axxia.c
@@ -77,6 +77,40 @@
MST_STATUS_IP)
#define MST_TX_BYTES_XFRD 0x50
#define MST_RX_BYTES_XFRD 0x54
+#define SLV_ADDR_DEC_CTL 0x58
+#define SLV_ADDR_DEC_GCE BIT(0) /* ACK to General Call Address from own master (loopback) */
+#define SLV_ADDR_DEC_OGCE BIT(1) /* ACK to General Call Address from external masters */
+#define SLV_ADDR_DEC_SA1E BIT(2) /* ACK to addr_1 enabled */
+#define SLV_ADDR_DEC_SA1M BIT(3) /* 10-bit addressing for addr_1 enabled */
+#define SLV_ADDR_DEC_SA2E BIT(4) /* ACK to addr_2 enabled */
+#define SLV_ADDR_DEC_SA2M BIT(5) /* 10-bit addressing for addr_2 enabled */
+#define SLV_ADDR_1 0x5c
+#define SLV_ADDR_2 0x60
+#define SLV_RX_CTL 0x64
+#define SLV_RX_ACSA1 BIT(0) /* Generate ACK for writes to addr_1 */
+#define SLV_RX_ACSA2 BIT(1) /* Generate ACK for writes to addr_2 */
+#define SLV_RX_ACGCA BIT(2) /* ACK data phase transfers to General Call Address */
+#define SLV_DATA 0x68
+#define SLV_RX_FIFO 0x6c
+#define SLV_FIFO_DV1 BIT(0) /* Data Valid for addr_1 */
+#define SLV_FIFO_DV2 BIT(1) /* Data Valid for addr_2 */
+#define SLV_FIFO_AS BIT(2) /* (N)ACK Sent */
+#define SLV_FIFO_TNAK BIT(3) /* Timeout NACK */
+#define SLV_FIFO_STRC BIT(4) /* First byte after start condition received */
+#define SLV_FIFO_RSC BIT(5) /* Repeated Start Condition */
+#define SLV_FIFO_STPC BIT(6) /* Stop Condition */
+#define SLV_FIFO_DV (SLV_FIFO_DV1 | SLV_FIFO_DV2)
+#define SLV_INT_ENABLE 0x70
+#define SLV_INT_STATUS 0x74
+#define SLV_STATUS_RFH BIT(0) /* FIFO service */
+#define SLV_STATUS_WTC BIT(1) /* Write transfer complete */
+#define SLV_STATUS_SRS1 BIT(2) /* Slave read from addr 1 */
+#define SLV_STATUS_SRRS1 BIT(3) /* Repeated start from addr 1 */
+#define SLV_STATUS_SRND1 BIT(4) /* Read request not following start condition */
+#define SLV_STATUS_SRC1 BIT(5) /* Read canceled */
+#define SLV_STATUS_SRAT1 BIT(6) /* Slave Read timed out */
+#define SLV_STATUS_SRDRE1 BIT(7) /* Data written after timed out */
+#define SLV_READ_DUMMY 0x78
#define SCL_HIGH_PERIOD 0x80
#define SCL_LOW_PERIOD 0x84
#define SPIKE_FLTR_LEN 0x88
@@ -111,6 +145,8 @@ struct axxia_i2c_dev {
struct clk *i2c_clk;
u32 bus_clk_rate;
bool last;
+ struct i2c_client *slave;
+ int irq;
};
static void i2c_int_disable(struct axxia_i2c_dev *idev, u32 mask)
@@ -276,13 +312,65 @@ static int axxia_i2c_fill_tx_fifo(struct axxia_i2c_dev *idev)
return ret;
}
+static void axxia_i2c_slv_fifo_event(struct axxia_i2c_dev *idev)
+{
+ u32 fifo_status = readl(idev->base + SLV_RX_FIFO);
+ u8 val;
+
+ dev_dbg(idev->dev, "slave irq fifo_status=0x%x\n", fifo_status);
+
+ if (fifo_status & SLV_FIFO_DV1) {
+ if (fifo_status & SLV_FIFO_STRC)
+ i2c_slave_event(idev->slave,
+ I2C_SLAVE_WRITE_REQUESTED, &val);
+
+ val = readl(idev->base + SLV_DATA);
+ i2c_slave_event(idev->slave, I2C_SLAVE_WRITE_RECEIVED, &val);
+ }
+ if (fifo_status & SLV_FIFO_STPC) {
+ readl(idev->base + SLV_DATA); /* dummy read */
+ i2c_slave_event(idev->slave, I2C_SLAVE_STOP, &val);
+ }
+ if (fifo_status & SLV_FIFO_RSC)
+ readl(idev->base + SLV_DATA); /* dummy read */
+}
+
+static irqreturn_t axxia_i2c_slv_isr(struct axxia_i2c_dev *idev)
+{
+ u32 status = readl(idev->base + SLV_INT_STATUS);
+ u8 val;
+
+ dev_dbg(idev->dev, "slave irq status=0x%x\n", status);
+
+ if (status & SLV_STATUS_RFH)
+ axxia_i2c_slv_fifo_event(idev);
+ if (status & SLV_STATUS_SRS1) {
+ i2c_slave_event(idev->slave, I2C_SLAVE_READ_REQUESTED, &val);
+ writel(val, idev->base + SLV_DATA);
+ }
+ if (status & SLV_STATUS_SRND1) {
+ i2c_slave_event(idev->slave, I2C_SLAVE_READ_PROCESSED, &val);
+ writel(val, idev->base + SLV_DATA);
+ }
+ if (status & SLV_STATUS_SRC1)
+ i2c_slave_event(idev->slave, I2C_SLAVE_STOP, &val);
+
+ writel(INT_SLV, idev->base + INTERRUPT_STATUS);
+ return IRQ_HANDLED;
+}
+
static irqreturn_t axxia_i2c_isr(int irq, void *_dev)
{
struct axxia_i2c_dev *idev = _dev;
+ irqreturn_t ret = IRQ_NONE;
u32 status;
- if (!(readl(idev->base + INTERRUPT_STATUS) & INT_MST))
- return IRQ_NONE;
+ status = readl(idev->base + INTERRUPT_STATUS);
+
+ if (status & INT_SLV)
+ ret = axxia_i2c_slv_isr(idev);
+ if (!(status & INT_MST))
+ return ret;
/* Read interrupt status bits */
status = readl(idev->base + MST_INT_STATUS);
@@ -583,9 +671,58 @@ static u32 axxia_i2c_func(struct i2c_adapter *adap)
return caps;
}
+static int axxia_i2c_reg_slave(struct i2c_client *slave)
+{
+ struct axxia_i2c_dev *idev = i2c_get_adapdata(slave->adapter);
+ u32 slv_int_mask = SLV_STATUS_RFH;
+ u32 dec_ctl;
+
+ if (idev->slave)
+ return -EBUSY;
+
+ idev->slave = slave;
+
+ /* Enable slave mode as well */
+ writel(GLOBAL_MST_EN | GLOBAL_SLV_EN, idev->base + GLOBAL_CONTROL);
+ writel(INT_MST | INT_SLV, idev->base + INTERRUPT_ENABLE);
+
+ /* Set slave address */
+ dec_ctl = SLV_ADDR_DEC_SA1E;
+ if (slave->flags & I2C_CLIENT_TEN)
+ dec_ctl |= SLV_ADDR_DEC_SA1M;
+
+ writel(SLV_RX_ACSA1, idev->base + SLV_RX_CTL);
+ writel(dec_ctl, idev->base + SLV_ADDR_DEC_CTL);
+ writel(slave->addr, idev->base + SLV_ADDR_1);
+
+ /* Enable interrupts */
+ slv_int_mask |= SLV_STATUS_SRS1 | SLV_STATUS_SRRS1 | SLV_STATUS_SRND1;
+ slv_int_mask |= SLV_STATUS_SRC1;
+ writel(slv_int_mask, idev->base + SLV_INT_ENABLE);
+
+ return 0;
+}
+
+static int axxia_i2c_unreg_slave(struct i2c_client *slave)
+{
+ struct axxia_i2c_dev *idev = i2c_get_adapdata(slave->adapter);
+
+ /* Disable slave mode */
+ writel(GLOBAL_MST_EN, idev->base + GLOBAL_CONTROL);
+ writel(INT_MST, idev->base + INTERRUPT_ENABLE);
+
+ synchronize_irq(idev->irq);
+
+ idev->slave = NULL;
+
+ return 0;
+}
+
static const struct i2c_algorithm axxia_i2c_algo = {
.master_xfer = axxia_i2c_xfer,
.functionality = axxia_i2c_func,
+ .reg_slave = axxia_i2c_reg_slave,
+ .unreg_slave = axxia_i2c_unreg_slave,
};
static const struct i2c_adapter_quirks axxia_i2c_quirks = {
@@ -599,7 +736,6 @@ static int axxia_i2c_probe(struct platform_device *pdev)
struct axxia_i2c_dev *idev = NULL;
struct resource *res;
void __iomem *base;
- int irq;
int ret = 0;
idev = devm_kzalloc(&pdev->dev, sizeof(*idev), GFP_KERNEL);
@@ -611,10 +747,10 @@ static int axxia_i2c_probe(struct platform_device *pdev)
if (IS_ERR(base))
return PTR_ERR(base);
- irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
+ idev->irq = platform_get_irq(pdev, 0);
+ if (idev->irq < 0) {
dev_err(&pdev->dev, "missing interrupt resource\n");
- return irq;
+ return idev->irq;
}
idev->i2c_clk = devm_clk_get(&pdev->dev, "i2c");
@@ -643,10 +779,10 @@ static int axxia_i2c_probe(struct platform_device *pdev)
goto error_disable_clk;
}
- ret = devm_request_irq(&pdev->dev, irq, axxia_i2c_isr, 0,
+ ret = devm_request_irq(&pdev->dev, idev->irq, axxia_i2c_isr, 0,
pdev->name, idev);
if (ret) {
- dev_err(&pdev->dev, "failed to claim IRQ%d\n", irq);
+ dev_err(&pdev->dev, "failed to claim IRQ%d\n", idev->irq);
goto error_disable_clk;
}
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index 19ef2b0c682a..9ffdffaf6141 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -808,7 +808,7 @@ static struct i2c_algorithm bcm_iproc_algo = {
.unreg_slave = bcm_iproc_i2c_unreg_slave,
};
-static struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
+static const struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
.max_read_len = M_RX_MAX_READ_LEN,
};
@@ -922,7 +922,9 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev)
adap = &iproc_i2c->adapter;
i2c_set_adapdata(adap, iproc_i2c);
- strlcpy(adap->name, "Broadcom iProc I2C adapter", sizeof(adap->name));
+ snprintf(adap->name, sizeof(adap->name),
+ "Broadcom iProc (%s)",
+ of_node_full_name(iproc_i2c->device->of_node));
adap->algo = &bcm_iproc_algo;
adap->quirks = &bcm_iproc_i2c_quirks;
adap->dev.parent = &pdev->dev;
diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c
index 67752f7b0371..e01b2b57e724 100644
--- a/drivers/i2c/busses/i2c-bcm2835.c
+++ b/drivers/i2c/busses/i2c-bcm2835.c
@@ -12,6 +12,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
@@ -389,7 +390,7 @@ static const struct i2c_algorithm bcm2835_i2c_algo = {
};
/*
- * This HW was reported to have problems with clock stretching:
+ * The BCM2835 was reported to have problems with clock stretching:
* http://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html
* https://www.raspberrypi.org/forums/viewtopic.php?p=146272
*/
@@ -471,11 +472,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
i2c_set_adapdata(adap, i2c_dev);
adap->owner = THIS_MODULE;
adap->class = I2C_CLASS_DEPRECATED;
- strlcpy(adap->name, "bcm2835 I2C adapter", sizeof(adap->name));
+ snprintf(adap->name, sizeof(adap->name), "bcm2835 (%s)",
+ of_node_full_name(pdev->dev.of_node));
adap->algo = &bcm2835_i2c_algo;
adap->dev.parent = &pdev->dev;
adap->dev.of_node = pdev->dev.of_node;
- adap->quirks = &bcm2835_i2c_quirks;
+ adap->quirks = of_device_get_match_data(&pdev->dev);
bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0);
@@ -501,7 +503,8 @@ static int bcm2835_i2c_remove(struct platform_device *pdev)
}
static const struct of_device_id bcm2835_i2c_of_match[] = {
- { .compatible = "brcm,bcm2835-i2c" },
+ { .compatible = "brcm,bcm2711-i2c" },
+ { .compatible = "brcm,bcm2835-i2c", .data = &bcm2835_i2c_quirks },
{},
};
MODULE_DEVICE_TABLE(of, bcm2835_i2c_of_match);
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
index 66af44bfa67d..b8fde61bb5d8 100644
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -178,6 +178,51 @@ static const struct i2c_algorithm cht_wc_i2c_adap_algo = {
.smbus_xfer = cht_wc_i2c_adap_smbus_xfer,
};
+/*
+ * We are an i2c-adapter which itself is part of an i2c-client. This means that
+ * transfers done through us take adapter->bus_lock twice, once for our parent
+ * i2c-adapter and once to take our own bus_lock. Lockdep does not like this
+ * nested locking, to make lockdep happy in the case of busses with muxes, the
+ * i2c-core's i2c_adapter_lock_bus function calls:
+ * rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter));
+ *
+ * But i2c_adapter_depth only works when the direct parent of the adapter is
+ * another adapter, as it is only meant for muxes. In our case there is an
+ * i2c-client and MFD instantiated platform_device in the parent->child chain
+ * between the 2 devices.
+ *
+ * So we override the default i2c_lock_operations and pass a hardcoded
+ * depth of 1 to rt_mutex_lock_nested, to make lockdep happy.
+ *
+ * Note that if there were to be a mux attached to our adapter, this would
+ * break things again since the i2c-mux code expects the root-adapter to have
+ * a locking depth of 0. But we always have only 1 client directly attached
+ * in the form of the Charger IC paired with the CHT Whiskey Cove PMIC.
+ */
+static void cht_wc_i2c_adap_lock_bus(struct i2c_adapter *adapter,
+ unsigned int flags)
+{
+ rt_mutex_lock_nested(&adapter->bus_lock, 1);
+}
+
+static int cht_wc_i2c_adap_trylock_bus(struct i2c_adapter *adapter,
+ unsigned int flags)
+{
+ return rt_mutex_trylock(&adapter->bus_lock);
+}
+
+static void cht_wc_i2c_adap_unlock_bus(struct i2c_adapter *adapter,
+ unsigned int flags)
+{
+ rt_mutex_unlock(&adapter->bus_lock);
+}
+
+static const struct i2c_lock_operations cht_wc_i2c_adap_lock_ops = {
+ .lock_bus = cht_wc_i2c_adap_lock_bus,
+ .trylock_bus = cht_wc_i2c_adap_trylock_bus,
+ .unlock_bus = cht_wc_i2c_adap_unlock_bus,
+};
+
/**** irqchip for the client connected to the extchgr i2c adapter ****/
static void cht_wc_i2c_irq_lock(struct irq_data *data)
{
@@ -286,6 +331,7 @@ static int cht_wc_i2c_adap_i2c_probe(struct platform_device *pdev)
adap->adapter.owner = THIS_MODULE;
adap->adapter.class = I2C_CLASS_HWMON;
adap->adapter.algo = &cht_wc_i2c_adap_algo;
+ adap->adapter.lock_ops = &cht_wc_i2c_adap_lock_ops;
strlcpy(adap->adapter.name, "PMIC I2C Adapter",
sizeof(adap->adapter.name));
adap->adapter.dev.parent = &pdev->dev;
@@ -363,8 +409,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev)
{
struct cht_wc_i2c_adap *adap = platform_get_drvdata(pdev);
- if (adap->client)
- i2c_unregister_device(adap->client);
+ i2c_unregister_device(adap->client);
i2c_del_adapter(&adap->adapter);
irq_domain_remove(adap->irq_domain);
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index d464799e40a3..e8b328242256 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -655,15 +655,11 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
struct i2c_bus_recovery_info *rinfo = &dev->rinfo;
struct i2c_adapter *adap = &dev->adapter;
struct gpio_desc *gpio;
- int r;
-
- gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH);
- if (IS_ERR(gpio)) {
- r = PTR_ERR(gpio);
- if (r == -ENOENT || r == -ENOSYS)
- return 0;
- return r;
- }
+
+ gpio = devm_gpiod_get_optional(dev->dev, "scl", GPIOD_OUT_HIGH);
+ if (IS_ERR_OR_NULL(gpio))
+ return PTR_ERR_OR_ZERO(gpio);
+
rinfo->scl_gpiod = gpio;
gpio = devm_gpiod_get_optional(dev->dev, "sda", GPIOD_IN);
diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index 76810deb2de6..050adda7c1bd 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c
@@ -33,6 +33,7 @@ enum dw_pci_ctl_id_t {
baytrail,
cherrytrail,
haswell,
+ elkhartlake,
};
struct dw_scl_sda_cfg {
@@ -168,13 +169,20 @@ static struct dw_pci_controller dw_pci_controllers[] = {
.flags = MODEL_CHERRYTRAIL,
.scl_sda_cfg = &byt_config,
},
+ [elkhartlake] = {
+ .bus_num = -1,
+ .bus_cfg = INTEL_MID_STD_CFG | DW_IC_CON_SPEED_FAST,
+ .tx_fifo_depth = 32,
+ .rx_fifo_depth = 32,
+ .functionality = I2C_FUNC_10BIT_ADDR,
+ .clk_khz = 100000,
+ },
};
#ifdef CONFIG_PM
static int i2c_dw_pci_suspend(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+ struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
i_dev->suspended = true;
i_dev->disable(i_dev);
@@ -184,8 +192,7 @@ static int i2c_dw_pci_suspend(struct device *dev)
static int i2c_dw_pci_resume(struct device *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
+ struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
int ret;
ret = i_dev->init(i_dev);
@@ -227,6 +234,8 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
return r;
}
+ pci_set_master(pdev);
+
r = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
if (r) {
dev_err(&pdev->dev, "I/O memory remapping failed\n");
@@ -237,18 +246,24 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
if (!dev)
return -ENOMEM;
+ r = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (r < 0)
+ return r;
+
dev->clk = NULL;
dev->controller = controller;
dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
dev->base = pcim_iomap_table(pdev)[0];
dev->dev = &pdev->dev;
- dev->irq = pdev->irq;
+ dev->irq = pci_irq_vector(pdev, 0);
dev->flags |= controller->flags;
if (controller->setup) {
r = controller->setup(pdev, controller);
- if (r)
+ if (r) {
+ pci_free_irq_vectors(pdev);
return r;
+ }
}
dev->functionality = controller->functionality |
@@ -276,8 +291,10 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev,
adap->nr = controller->bus_num;
r = i2c_dw_probe(dev);
- if (r)
+ if (r) {
+ pci_free_irq_vectors(pdev);
return r;
+ }
pm_runtime_set_autosuspend_delay(&pdev->dev, 1000);
pm_runtime_use_autosuspend(&pdev->dev);
@@ -296,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
pm_runtime_get_noresume(&pdev->dev);
i2c_del_adapter(&dev->adapter);
+ pci_free_irq_vectors(pdev);
}
/* work with hotplug and coldplug */
@@ -331,6 +349,15 @@ static const struct pci_device_id i2_designware_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x22C5), cherrytrail },
{ PCI_VDEVICE(INTEL, 0x22C6), cherrytrail },
{ PCI_VDEVICE(INTEL, 0x22C7), cherrytrail },
+ /* Elkhart Lake (PSE I2C) */
+ { PCI_VDEVICE(INTEL, 0x4bb9), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bba), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bbb), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bbc), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bbd), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bbe), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bbf), elkhartlake },
+ { PCI_VDEVICE(INTEL, 0x4bc0), elkhartlake },
{ 0,}
};
MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids);
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index ddfb81872906..16dd338877d0 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -279,12 +279,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, dev);
dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
- if (IS_ERR(dev->rst)) {
- if (PTR_ERR(dev->rst) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
- } else {
- reset_control_deassert(dev->rst);
- }
+ if (IS_ERR(dev->rst))
+ return PTR_ERR(dev->rst);
+
+ reset_control_deassert(dev->rst);
t = &dev->timings;
if (pdata)
@@ -346,8 +344,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
/* Optional interface clock */
dev->pclk = devm_clk_get_optional(&pdev->dev, "pclk");
- if (IS_ERR(dev->pclk))
- return PTR_ERR(dev->pclk);
+ if (IS_ERR(dev->pclk)) {
+ ret = PTR_ERR(dev->pclk);
+ goto exit_reset;
+ }
dev->clk = devm_clk_get(&pdev->dev, NULL);
if (!i2c_dw_prepare_clk(dev, true)) {
@@ -400,8 +400,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
exit_probe:
dw_i2c_plat_pm_cleanup(dev);
exit_reset:
- if (!IS_ERR_OR_NULL(dev->rst))
- reset_control_assert(dev->rst);
+ reset_control_assert(dev->rst);
return ret;
}
@@ -419,8 +418,7 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
pm_runtime_put_sync(&pdev->dev);
dw_i2c_plat_pm_cleanup(dev);
- if (!IS_ERR_OR_NULL(dev->rst))
- reset_control_assert(dev->rst);
+ reset_control_assert(dev->rst);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index e4e7932f7800..e7514c16b756 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -791,9 +791,7 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
}
ret = devm_request_irq(&pdev->dev, i2c->irq, exynos5_i2c_irq,
- IRQF_NO_SUSPEND | IRQF_ONESHOT,
- dev_name(&pdev->dev), i2c);
-
+ IRQF_NO_SUSPEND, dev_name(&pdev->dev), i2c);
if (ret != 0) {
dev_err(&pdev->dev, "cannot request HS-I2C IRQ %d\n", i2c->irq);
goto err_clk;
diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c
index da5eb3960def..e0c256922d4f 100644
--- a/drivers/i2c/busses/i2c-fsi.c
+++ b/drivers/i2c/busses/i2c-fsi.c
@@ -707,8 +707,10 @@ static int fsi_i2c_probe(struct device *dev)
continue;
port = kzalloc(sizeof(*port), GFP_KERNEL);
- if (!port)
+ if (!port) {
+ of_node_put(np);
break;
+ }
port->master = i2c;
port->port = port_no;
diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c
index 4df1434b3597..8497c7a95dd4 100644
--- a/drivers/i2c/busses/i2c-hix5hd2.c
+++ b/drivers/i2c/busses/i2c-hix5hd2.c
@@ -445,8 +445,7 @@ static int hix5hd2_i2c_probe(struct platform_device *pdev)
hix5hd2_i2c_init(priv);
ret = devm_request_irq(&pdev->dev, irq, hix5hd2_i2c_irq,
- IRQF_NO_SUSPEND | IRQF_ONESHOT,
- dev_name(&pdev->dev), priv);
+ IRQF_NO_SUSPEND, dev_name(&pdev->dev), priv);
if (ret != 0) {
dev_err(&pdev->dev, "cannot request HS-I2C IRQ %d\n", irq);
goto err_clk;
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 36e9559f880c..c09791fb4929 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -292,7 +292,8 @@ struct i801_priv {
#define FEATURE_HOST_NOTIFY BIT(5)
/* Not really a feature, but it's convenient to handle it as such */
#define FEATURE_IDF BIT(15)
-#define FEATURE_TCO BIT(16)
+#define FEATURE_TCO_SPT BIT(16)
+#define FEATURE_TCO_CNL BIT(17)
static const char *i801_feature_names[] = {
"SMBus PEC",
@@ -1500,57 +1501,23 @@ static inline unsigned int i801_get_adapter_class(struct i801_priv *priv)
}
#endif
-static const struct itco_wdt_platform_data tco_platform_data = {
+static const struct itco_wdt_platform_data spt_tco_platform_data = {
.name = "Intel PCH",
.version = 4,
};
static DEFINE_SPINLOCK(p2sb_spinlock);
-static void i801_add_tco(struct i801_priv *priv)
+static struct platform_device *
+i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
+ struct resource *tco_res)
{
- struct pci_dev *pci_dev = priv->pci_dev;
- struct resource tco_res[3], *res;
- struct platform_device *pdev;
+ struct resource *res;
unsigned int devfn;
- u32 tco_base, tco_ctl;
- u32 base_addr, ctrl_val;
u64 base64_addr;
+ u32 base_addr;
u8 hidden;
- if (!(priv->features & FEATURE_TCO))
- return;
-
- pci_read_config_dword(pci_dev, TCOBASE, &tco_base);
- pci_read_config_dword(pci_dev, TCOCTL, &tco_ctl);
- if (!(tco_ctl & TCOCTL_EN))
- return;
-
- memset(tco_res, 0, sizeof(tco_res));
-
- res = &tco_res[ICH_RES_IO_TCO];
- res->start = tco_base & ~1;
- res->end = res->start + 32 - 1;
- res->flags = IORESOURCE_IO;
-
- /*
- * Power Management registers.
- */
- devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
- pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
-
- res = &tco_res[ICH_RES_IO_SMI];
- res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
- res->end = res->start + 3;
- res->flags = IORESOURCE_IO;
-
- /*
- * Enable the ACPI I/O space.
- */
- pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
- ctrl_val |= ACPICTRL_EN;
- pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
-
/*
* We must access the NO_REBOOT bit over the Primary to Sideband
* bridge (P2SB). The BIOS prevents the P2SB device from being
@@ -1586,15 +1553,76 @@ static void i801_add_tco(struct i801_priv *priv)
res->end = res->start + 3;
res->flags = IORESOURCE_MEM;
- pdev = platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
- tco_res, 3, &tco_platform_data,
- sizeof(tco_platform_data));
- if (IS_ERR(pdev)) {
- dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
+ return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+ tco_res, 3, &spt_tco_platform_data,
+ sizeof(spt_tco_platform_data));
+}
+
+static const struct itco_wdt_platform_data cnl_tco_platform_data = {
+ .name = "Intel PCH",
+ .version = 6,
+};
+
+static struct platform_device *
+i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
+ struct resource *tco_res)
+{
+ return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1,
+ tco_res, 2, &cnl_tco_platform_data,
+ sizeof(cnl_tco_platform_data));
+}
+
+static void i801_add_tco(struct i801_priv *priv)
+{
+ u32 base_addr, tco_base, tco_ctl, ctrl_val;
+ struct pci_dev *pci_dev = priv->pci_dev;
+ struct resource tco_res[3], *res;
+ unsigned int devfn;
+
+ /* If we have ACPI based watchdog use that instead */
+ if (acpi_has_watchdog())
return;
- }
- priv->tco_pdev = pdev;
+ if (!(priv->features & (FEATURE_TCO_SPT | FEATURE_TCO_CNL)))
+ return;
+
+ pci_read_config_dword(pci_dev, TCOBASE, &tco_base);
+ pci_read_config_dword(pci_dev, TCOCTL, &tco_ctl);
+ if (!(tco_ctl & TCOCTL_EN))
+ return;
+
+ memset(tco_res, 0, sizeof(tco_res));
+
+ res = &tco_res[ICH_RES_IO_TCO];
+ res->start = tco_base & ~1;
+ res->end = res->start + 32 - 1;
+ res->flags = IORESOURCE_IO;
+
+ /*
+ * Power Management registers.
+ */
+ devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2);
+ pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr);
+
+ res = &tco_res[ICH_RES_IO_SMI];
+ res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF;
+ res->end = res->start + 3;
+ res->flags = IORESOURCE_IO;
+
+ /*
+ * Enable the ACPI I/O space.
+ */
+ pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val);
+ ctrl_val |= ACPICTRL_EN;
+ pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val);
+
+ if (priv->features & FEATURE_TCO_CNL)
+ priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res);
+ else
+ priv->tco_pdev = i801_add_tco_spt(priv, pci_dev, tco_res);
+
+ if (IS_ERR(priv->tco_pdev))
+ dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
}
#ifdef CONFIG_ACPI
@@ -1704,13 +1732,21 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
switch (dev->device) {
case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
- case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
- case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
- case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS:
+ priv->features |= FEATURE_I2C_BLOCK_READ;
+ priv->features |= FEATURE_IRQ;
+ priv->features |= FEATURE_SMBUS_PEC;
+ priv->features |= FEATURE_BLOCK_BUFFER;
+ priv->features |= FEATURE_TCO_SPT;
+ priv->features |= FEATURE_HOST_NOTIFY;
+ break;
+
+ case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
+ case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
+ case PCI_DEVICE_ID_INTEL_CDF_SMBUS:
case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS:
case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS:
case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS:
@@ -1720,9 +1756,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
priv->features |= FEATURE_IRQ;
priv->features |= FEATURE_SMBUS_PEC;
priv->features |= FEATURE_BLOCK_BUFFER;
- /* If we have ACPI based watchdog use that instead */
- if (!acpi_has_watchdog())
- priv->features |= FEATURE_TCO;
+ priv->features |= FEATURE_TCO_CNL;
priv->features |= FEATURE_HOST_NOTIFY;
break;
@@ -1921,8 +1955,7 @@ static int i801_suspend(struct device *dev)
static int i801_resume(struct device *dev)
{
- struct pci_dev *pci_dev = to_pci_dev(dev);
- struct i801_priv *priv = pci_get_drvdata(pci_dev);
+ struct i801_priv *priv = dev_get_drvdata(dev);
i801_enable_host_notify(&priv->adapter);
diff --git a/drivers/i2c/busses/i2c-icy.c b/drivers/i2c/busses/i2c-icy.c
new file mode 100644
index 000000000000..8382eb64b424
--- /dev/null
+++ b/drivers/i2c/busses/i2c-icy.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C driver for stand-alone PCF8584 style adapters on Zorro cards
+ *
+ * Original ICY documentation can be found on Aminet:
+ * https://aminet.net/package/docs/hard/icy
+ *
+ * There has been a modern community re-print of this design in 2019:
+ * https://www.a1k.org/forum/index.php?threads/70106/
+ *
+ * The card is basically a Philips PCF8584 connected straight to the
+ * beginning of the AutoConfig'd address space (register S1 on base+2),
+ * with /INT on /INT2 on the Zorro bus.
+ *
+ * Copyright (c) 2019 Max Staudt <max@enpas.org>
+ *
+ * This started as a fork of i2c-elektor.c and has evolved since.
+ * Thanks go to its authors for providing a base to grow on.
+ *
+ *
+ * IRQ support is currently not implemented.
+ *
+ * As it turns out, i2c-algo-pcf is really written with i2c-elektor's
+ * edge-triggered ISA interrupts in mind, while the Amiga's Zorro bus has
+ * level-triggered interrupts. This means that once an interrupt occurs, we
+ * have to tell the PCF8584 to shut up immediately, or it will keep the
+ * interrupt line busy and cause an IRQ storm.
+
+ * However, because of the PCF8584's host-side protocol, there is no good
+ * way to just quieten it without side effects. Rather, we have to perform
+ * the next read/write operation straight away, which will reset the /INT
+ * pin. This entails re-designing the core of i2c-algo-pcf in the future.
+ * For now, we never request an IRQ from the PCF8584, and poll it instead.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c-algo-pcf.h>
+
+#include <asm/amigaints.h>
+#include <linux/zorro.h>
+
+#include "../algos/i2c-algo-pcf.h"
+
+struct icy_i2c {
+ struct i2c_adapter adapter;
+
+ void __iomem *reg_s0;
+ void __iomem *reg_s1;
+ struct fwnode_handle *ltc2990_fwnode;
+ struct i2c_client *ltc2990_client;
+};
+
+/*
+ * Functions called by i2c-algo-pcf
+ */
+static void icy_pcf_setpcf(void *data, int ctl, int val)
+{
+ struct icy_i2c *i2c = (struct icy_i2c *)data;
+
+ u8 __iomem *address = ctl ? i2c->reg_s1 : i2c->reg_s0;
+
+ z_writeb(val, address);
+}
+
+static int icy_pcf_getpcf(void *data, int ctl)
+{
+ struct icy_i2c *i2c = (struct icy_i2c *)data;
+
+ u8 __iomem *address = ctl ? i2c->reg_s1 : i2c->reg_s0;
+
+ return z_readb(address);
+}
+
+static int icy_pcf_getown(void *data)
+{
+ return 0x55;
+}
+
+static int icy_pcf_getclock(void *data)
+{
+ return 0x1c;
+}
+
+static void icy_pcf_waitforpin(void *data)
+{
+ usleep_range(50, 150);
+}
+
+/*
+ * Main i2c-icy part
+ */
+static unsigned short const icy_ltc2990_addresses[] = {
+ 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END
+};
+
+/*
+ * Additional sensors exposed once this property is applied:
+ *
+ * in1 will be the voltage of the 5V rail, divided by 2.
+ * in2 will be the voltage of the 12V rail, divided by 4.
+ * temp3 will be measured using a PCB loop next the chip.
+ */
+static const u32 icy_ltc2990_meas_mode[] = {0, 3};
+
+static const struct property_entry icy_ltc2990_props[] = {
+ PROPERTY_ENTRY_U32_ARRAY("lltc,meas-mode", icy_ltc2990_meas_mode),
+ { }
+};
+
+static int icy_probe(struct zorro_dev *z,
+ const struct zorro_device_id *ent)
+{
+ struct icy_i2c *i2c;
+ struct i2c_algo_pcf_data *algo_data;
+ struct fwnode_handle *new_fwnode;
+ struct i2c_board_info ltc2990_info = {
+ .type = "ltc2990",
+ .addr = 0x4c,
+ };
+
+ i2c = devm_kzalloc(&z->dev, sizeof(*i2c), GFP_KERNEL);
+ if (!i2c)
+ return -ENOMEM;
+
+ algo_data = devm_kzalloc(&z->dev, sizeof(*algo_data), GFP_KERNEL);
+ if (!algo_data)
+ return -ENOMEM;
+
+ dev_set_drvdata(&z->dev, i2c);
+ i2c->adapter.dev.parent = &z->dev;
+ i2c->adapter.owner = THIS_MODULE;
+ /* i2c->adapter.algo assigned by i2c_pcf_add_bus() */
+ i2c->adapter.algo_data = algo_data;
+ strlcpy(i2c->adapter.name, "ICY I2C Zorro adapter",
+ sizeof(i2c->adapter.name));
+
+ if (!devm_request_mem_region(&z->dev,
+ z->resource.start,
+ 4, i2c->adapter.name))
+ return -ENXIO;
+
+ /* Driver private data */
+ i2c->reg_s0 = ZTWO_VADDR(z->resource.start);
+ i2c->reg_s1 = ZTWO_VADDR(z->resource.start + 2);
+
+ algo_data->data = i2c;
+ algo_data->setpcf = icy_pcf_setpcf;
+ algo_data->getpcf = icy_pcf_getpcf;
+ algo_data->getown = icy_pcf_getown;
+ algo_data->getclock = icy_pcf_getclock;
+ algo_data->waitforpin = icy_pcf_waitforpin;
+
+ if (i2c_pcf_add_bus(&i2c->adapter)) {
+ dev_err(&z->dev, "i2c_pcf_add_bus() failed\n");
+ return -ENXIO;
+ }
+
+ dev_info(&z->dev, "ICY I2C controller at %pa, IRQ not implemented\n",
+ &z->resource.start);
+
+ /*
+ * The 2019 a1k.org PCBs have an LTC2990 at 0x4c, so start
+ * it automatically once ltc2990 is modprobed.
+ *
+ * in0 is the voltage of the internal 5V power supply.
+ * temp1 is the temperature inside the chip.
+ *
+ * See property_entry above for in1, in2, temp3.
+ */
+ new_fwnode = fwnode_create_software_node(icy_ltc2990_props, NULL);
+ if (IS_ERR(new_fwnode)) {
+ dev_info(&z->dev, "Failed to create fwnode for LTC2990, error: %ld\n",
+ PTR_ERR(new_fwnode));
+ } else {
+ /*
+ * Store the fwnode so we can destroy it on .remove().
+ * Only store it on success, as fwnode_remove_software_node()
+ * is NULL safe, but not PTR_ERR safe.
+ */
+ i2c->ltc2990_fwnode = new_fwnode;
+ ltc2990_info.fwnode = new_fwnode;
+
+ i2c->ltc2990_client =
+ i2c_new_probed_device(&i2c->adapter,
+ &ltc2990_info,
+ icy_ltc2990_addresses,
+ NULL);
+ }
+
+ return 0;
+}
+
+static void icy_remove(struct zorro_dev *z)
+{
+ struct icy_i2c *i2c = dev_get_drvdata(&z->dev);
+
+ i2c_unregister_device(i2c->ltc2990_client);
+ fwnode_remove_software_node(i2c->ltc2990_fwnode);
+
+ i2c_del_adapter(&i2c->adapter);
+}
+
+static const struct zorro_device_id icy_zorro_tbl[] = {
+ { ZORRO_ID(VMC, 15, 0), },
+ { 0 }
+};
+
+MODULE_DEVICE_TABLE(zorro, icy_zorro_tbl);
+
+static struct zorro_driver icy_driver = {
+ .name = "i2c-icy",
+ .id_table = icy_zorro_tbl,
+ .probe = icy_probe,
+ .remove = icy_remove,
+};
+
+module_driver(icy_driver,
+ zorro_register_driver,
+ zorro_unregister_driver);
+
+MODULE_AUTHOR("Max Staudt <max@enpas.org>");
+MODULE_DESCRIPTION("I2C bus via PCF8584 on ICY Zorro card");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index dc00fabc919a..c92b56485fa6 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -545,7 +545,6 @@ MODULE_DEVICE_TABLE(of, lpi2c_imx_of_match);
static int lpi2c_imx_probe(struct platform_device *pdev)
{
struct lpi2c_imx_struct *lpi2c_imx;
- struct resource *res;
unsigned int temp;
int irq, ret;
@@ -553,8 +552,7 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
if (!lpi2c_imx)
return -ENOMEM;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- lpi2c_imx->base = devm_ioremap_resource(&pdev->dev, res);
+ lpi2c_imx->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(lpi2c_imx->base))
return PTR_ERR(lpi2c_imx->base);
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 15f6cde6452f..a3b61336fe55 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -20,6 +20,7 @@
*
*/
+#include <linux/acpi.h>
#include <linux/clk.h>
#include <linux/completion.h>
#include <linux/delay.h>
@@ -255,6 +256,12 @@ static const struct of_device_id i2c_imx_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, i2c_imx_dt_ids);
+static const struct acpi_device_id i2c_imx_acpi_ids[] = {
+ {"NXP0001", .driver_data = (kernel_ulong_t)&vf610_i2c_hwdata},
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, i2c_imx_acpi_ids);
+
static inline int is_imx1_i2c(struct imx_i2c_struct *i2c_imx)
{
return i2c_imx->hwdata->devtype == IMX1_I2C;
@@ -1048,14 +1055,13 @@ static const struct i2c_algorithm i2c_imx_algo = {
static int i2c_imx_probe(struct platform_device *pdev)
{
- const struct of_device_id *of_id = of_match_device(i2c_imx_dt_ids,
- &pdev->dev);
struct imx_i2c_struct *i2c_imx;
struct resource *res;
struct imxi2c_platform_data *pdata = dev_get_platdata(&pdev->dev);
void __iomem *base;
int irq, ret;
dma_addr_t phy_addr;
+ const struct imx_i2c_hwdata *match;
dev_dbg(&pdev->dev, "<%s>\n", __func__);
@@ -1075,8 +1081,9 @@ static int i2c_imx_probe(struct platform_device *pdev)
if (!i2c_imx)
return -ENOMEM;
- if (of_id)
- i2c_imx->hwdata = of_id->data;
+ match = device_get_match_data(&pdev->dev);
+ if (match)
+ i2c_imx->hwdata = match;
else
i2c_imx->hwdata = (struct imx_i2c_hwdata *)
platform_get_device_id(pdev)->driver_data;
@@ -1089,6 +1096,7 @@ static int i2c_imx_probe(struct platform_device *pdev)
i2c_imx->adapter.nr = pdev->id;
i2c_imx->adapter.dev.of_node = pdev->dev.of_node;
i2c_imx->base = base;
+ ACPI_COMPANION_SET(&i2c_imx->adapter.dev, ACPI_COMPANION(&pdev->dev));
/* Get I2C clock */
i2c_imx->clk = devm_clk_get(&pdev->dev, NULL);
@@ -1247,6 +1255,7 @@ static struct platform_driver i2c_imx_driver = {
.name = DRIVER_NAME,
.pm = &i2c_imx_pm_ops,
.of_match_table = i2c_imx_dt_ids,
+ .acpi_match_table = i2c_imx_acpi_ids,
},
.id_table = imx_i2c_devtype,
};
diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index 02d23edb2fb1..2f95e25a10f7 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -781,8 +781,6 @@ static int ismt_dev_init(struct ismt_priv *priv)
if (!priv->hw)
return -ENOMEM;
- memset(priv->hw, 0, (ISMT_DESC_ENTRIES * sizeof(struct ismt_desc)));
-
priv->head = 0;
init_completion(&priv->cmp);
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 7d79317a1046..89224913f578 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -802,7 +802,6 @@ static int mxs_i2c_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct mxs_i2c_dev *i2c;
struct i2c_adapter *adap;
- struct resource *res;
int err, irq;
i2c = devm_kzalloc(dev, sizeof(*i2c), GFP_KERNEL);
@@ -814,8 +813,7 @@ static int mxs_i2c_probe(struct platform_device *pdev)
i2c->dev_type = device_id->driver_data;
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- i2c->regs = devm_ioremap_resource(&pdev->dev, res);
+ i2c->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(i2c->regs))
return PTR_ERR(i2c->regs);
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 4117f1abc7c6..ca8b3ecfa93d 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -703,8 +703,9 @@ static int ocores_i2c_probe(struct platform_device *pdev)
}
if (ocores_algorithm.master_xfer != ocores_xfer_polling) {
- ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
- pdev->name, i2c);
+ ret = devm_request_any_context_irq(&pdev->dev, irq,
+ ocores_isr, 0,
+ pdev->name, i2c);
if (ret) {
dev_err(&pdev->dev, "Cannot claim IRQ\n");
goto err_clk;
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index cba325eb852f..30ded6422e7b 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -72,7 +72,8 @@
#define PIIX4_BLOCK_DATA 0x14
/* Multi-port constants */
-#define PIIX4_MAX_ADAPTERS 4
+#define PIIX4_MAX_ADAPTERS 4
+#define HUDSON2_MAIN_PORTS 2 /* HUDSON2, KERNCZ reserves ports 3, 4 */
/* SB800 constants */
#define SB800_PIIX4_SMB_IDX 0xcd6
@@ -806,10 +807,12 @@ MODULE_DEVICE_TABLE (pci, piix4_ids);
static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
static struct i2c_adapter *piix4_aux_adapter;
+static int piix4_adapter_count;
static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
bool sb800_main, u8 port, bool notify_imc,
- const char *name, struct i2c_adapter **padap)
+ u8 hw_port_nr, const char *name,
+ struct i2c_adapter **padap)
{
struct i2c_adapter *adap;
struct i2c_piix4_adapdata *adapdata;
@@ -841,6 +844,12 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
/* set up the sysfs linkage to our parent device */
adap->dev.parent = &dev->dev;
+ if (has_acpi_companion(&dev->dev)) {
+ acpi_preset_companion(&adap->dev,
+ ACPI_COMPANION(&dev->dev),
+ hw_port_nr);
+ }
+
snprintf(adap->name, sizeof(adap->name),
"SMBus PIIX4 adapter%s at %04x", name, smba);
@@ -865,8 +874,19 @@ static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba,
int port;
int retval;
- for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) {
+ if (dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS ||
+ (dev->device == PCI_DEVICE_ID_AMD_HUDSON2_SMBUS &&
+ dev->revision >= 0x1F)) {
+ piix4_adapter_count = HUDSON2_MAIN_PORTS;
+ } else {
+ piix4_adapter_count = PIIX4_MAX_ADAPTERS;
+ }
+
+ for (port = 0; port < piix4_adapter_count; port++) {
+ u8 hw_port_nr = port == 0 ? 0 : port + 1;
+
retval = piix4_add_adapter(dev, smba, true, port, notify_imc,
+ hw_port_nr,
piix4_main_port_names_sb800[port],
&piix4_main_adapters[port]);
if (retval < 0)
@@ -937,8 +957,8 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
return retval;
/* Try to register main SMBus adapter, give up if we can't */
- retval = piix4_add_adapter(dev, retval, false, 0, false, "",
- &piix4_main_adapters[0]);
+ retval = piix4_add_adapter(dev, retval, false, 0, false, 0,
+ "", &piix4_main_adapters[0]);
if (retval < 0)
return retval;
}
@@ -964,7 +984,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
if (retval > 0) {
/* Try to add the aux adapter if it exists,
* piix4_add_adapter will clean up if this fails */
- piix4_add_adapter(dev, retval, false, 0, false,
+ piix4_add_adapter(dev, retval, false, 0, false, 1,
is_sb800 ? piix4_aux_port_name_sb800 : "",
&piix4_aux_adapter);
}
@@ -987,7 +1007,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
static void piix4_remove(struct pci_dev *dev)
{
- int port = PIIX4_MAX_ADAPTERS;
+ int port = piix4_adapter_count;
while (--port >= 0) {
if (piix4_main_adapters[port]) {
diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c
index 961123529678..b432e7580458 100644
--- a/drivers/i2c/busses/i2c-sprd.c
+++ b/drivers/i2c/busses/i2c-sprd.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
@@ -465,9 +466,9 @@ static int sprd_i2c_clk_init(struct sprd_i2c *i2c_dev)
i2c_dev->clk = devm_clk_get(i2c_dev->dev, "enable");
if (IS_ERR(i2c_dev->clk)) {
- dev_warn(i2c_dev->dev, "i2c%d can't get the enable clock\n",
- i2c_dev->adap.nr);
- i2c_dev->clk = NULL;
+ dev_err(i2c_dev->dev, "i2c%d can't get the enable clock\n",
+ i2c_dev->adap.nr);
+ return PTR_ERR(i2c_dev->clk);
}
return 0;
@@ -477,7 +478,6 @@ static int sprd_i2c_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct sprd_i2c *i2c_dev;
- struct resource *res;
u32 prop;
int ret;
@@ -487,8 +487,7 @@ static int sprd_i2c_probe(struct platform_device *pdev)
if (!i2c_dev)
return -ENOMEM;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- i2c_dev->base = devm_ioremap_resource(dev, res);
+ i2c_dev->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(i2c_dev->base))
return PTR_ERR(i2c_dev->base);
@@ -520,7 +519,10 @@ static int sprd_i2c_probe(struct platform_device *pdev)
if (i2c_dev->bus_freq != 100000 && i2c_dev->bus_freq != 400000)
return -EINVAL;
- sprd_i2c_clk_init(i2c_dev);
+ ret = sprd_i2c_clk_init(i2c_dev);
+ if (ret)
+ return ret;
+
platform_set_drvdata(pdev, i2c_dev);
ret = clk_prepare_enable(i2c_dev->clk);
@@ -644,8 +646,7 @@ static struct platform_driver sprd_i2c_driver = {
},
};
-static int sprd_i2c_init(void)
-{
- return platform_driver_register(&sprd_i2c_driver);
-}
-arch_initcall_sync(sprd_i2c_init);
+module_platform_driver(sprd_i2c_driver);
+
+MODULE_DESCRIPTION("Spreadtrum I2C master controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index 266d1c269b83..d36cf08461f7 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -1809,7 +1809,7 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap)
I2C_FUNC_SMBUS_I2C_BLOCK;
}
-static struct i2c_algorithm stm32f7_i2c_algo = {
+static const struct i2c_algorithm stm32f7_i2c_algo = {
.master_xfer = stm32f7_i2c_xfer,
.smbus_xfer = stm32f7_i2c_smbus_xfer,
.functionality = stm32f7_i2c_func,
diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index f724c8e6b360..39762f0611b1 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -526,7 +526,7 @@ static const struct i2c_algorithm synquacer_i2c_algo = {
.functionality = synquacer_i2c_functionality,
};
-static struct i2c_adapter synquacer_i2c_ops = {
+static const struct i2c_adapter synquacer_i2c_ops = {
.owner = THIS_MODULE,
.name = "synquacer_i2c-adapter",
.algo = &synquacer_i2c_algo,
diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c
index 37347c93e8e0..0bff3f3a8779 100644
--- a/drivers/i2c/busses/i2c-taos-evm.c
+++ b/drivers/i2c/busses/i2c-taos-evm.c
@@ -39,7 +39,7 @@ struct taos_data {
};
/* TAOS TSL2550 EVM */
-static struct i2c_board_info tsl2550_info = {
+static const struct i2c_board_info tsl2550_info = {
I2C_BOARD_INFO("tsl2550", 0x39),
};
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 9fcb13beeb8f..c1683f9338b4 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -636,7 +636,7 @@ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev)
dvc_writel(i2c_dev, val, DVC_CTRL_REG1);
}
-static int tegra_i2c_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev)
{
struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
int ret;
@@ -665,7 +665,7 @@ static int tegra_i2c_runtime_resume(struct device *dev)
return 0;
}
-static int tegra_i2c_runtime_suspend(struct device *dev)
+static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev)
{
struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
@@ -713,12 +713,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
u32 tsu_thd;
u8 tlow, thigh;
- err = pm_runtime_get_sync(i2c_dev->dev);
- if (err < 0) {
- dev_err(i2c_dev->dev, "runtime resume failed %d\n", err);
- return err;
- }
-
reset_control_assert(i2c_dev->rst);
udelay(2);
reset_control_deassert(i2c_dev->rst);
@@ -772,7 +766,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
if (err) {
dev_err(i2c_dev->dev,
"failed changing clock rate: %d\n", err);
- goto err;
+ return err;
}
}
@@ -787,23 +781,21 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit)
err = tegra_i2c_flush_fifos(i2c_dev);
if (err)
- goto err;
+ return err;
if (i2c_dev->is_multimaster_mode && i2c_dev->hw->has_slcg_override_reg)
i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE);
err = tegra_i2c_wait_for_config_load(i2c_dev);
if (err)
- goto err;
+ return err;
if (i2c_dev->irq_disabled) {
i2c_dev->irq_disabled = false;
enable_irq(i2c_dev->irq);
}
-err:
- pm_runtime_put(i2c_dev->dev);
- return err;
+ return 0;
}
static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev)
@@ -1616,12 +1608,14 @@ static int tegra_i2c_probe(struct platform_device *pdev)
}
pm_runtime_enable(&pdev->dev);
- if (!pm_runtime_enabled(&pdev->dev)) {
+ if (!pm_runtime_enabled(&pdev->dev))
ret = tegra_i2c_runtime_resume(&pdev->dev);
- if (ret < 0) {
- dev_err(&pdev->dev, "runtime resume failed\n");
- goto unprepare_div_clk;
- }
+ else
+ ret = pm_runtime_get_sync(i2c_dev->dev);
+
+ if (ret < 0) {
+ dev_err(&pdev->dev, "runtime resume failed\n");
+ goto unprepare_div_clk;
}
if (i2c_dev->is_multimaster_mode) {
@@ -1666,6 +1660,8 @@ static int tegra_i2c_probe(struct platform_device *pdev)
if (ret)
goto release_dma;
+ pm_runtime_put(&pdev->dev);
+
return 0;
release_dma:
@@ -1711,8 +1707,7 @@ static int tegra_i2c_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-static int tegra_i2c_suspend(struct device *dev)
+static int __maybe_unused tegra_i2c_suspend(struct device *dev)
{
struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
@@ -1721,38 +1716,41 @@ static int tegra_i2c_suspend(struct device *dev)
return 0;
}
-static int tegra_i2c_resume(struct device *dev)
+static int __maybe_unused tegra_i2c_resume(struct device *dev)
{
struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
int err;
+ err = tegra_i2c_runtime_resume(dev);
+ if (err)
+ return err;
+
err = tegra_i2c_init(i2c_dev, false);
if (err)
return err;
+ err = tegra_i2c_runtime_suspend(dev);
+ if (err)
+ return err;
+
i2c_mark_adapter_resumed(&i2c_dev->adapter);
return 0;
}
static const struct dev_pm_ops tegra_i2c_pm = {
- SET_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
SET_RUNTIME_PM_OPS(tegra_i2c_runtime_suspend, tegra_i2c_runtime_resume,
NULL)
};
-#define TEGRA_I2C_PM (&tegra_i2c_pm)
-#else
-#define TEGRA_I2C_PM NULL
-#endif
-
static struct platform_driver tegra_i2c_driver = {
.probe = tegra_i2c_probe,
.remove = tegra_i2c_remove,
.driver = {
.name = "tegra-i2c",
.of_match_table = tegra_i2c_of_match,
- .pm = TEGRA_I2C_PM,
+ .pm = &tegra_i2c_pm,
},
};
diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index 7acca2599f04..4241aac79e7e 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -108,7 +108,6 @@ static void uniphier_fi2c_fill_txfifo(struct uniphier_fi2c_priv *priv,
if (fifo_space-- <= 0)
break;
- dev_dbg(&priv->adap.dev, "write data: %02x\n", *priv->buf);
writel(*priv->buf++, priv->membase + UNIPHIER_FI2C_DTTX);
priv->len--;
}
@@ -124,7 +123,6 @@ static void uniphier_fi2c_drain_rxfifo(struct uniphier_fi2c_priv *priv)
break;
*priv->buf++ = readl(priv->membase + UNIPHIER_FI2C_DTRX);
- dev_dbg(&priv->adap.dev, "read data: %02x\n", priv->buf[-1]);
priv->len--;
}
}
@@ -142,8 +140,6 @@ static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv,
static void uniphier_fi2c_stop(struct uniphier_fi2c_priv *priv)
{
- dev_dbg(&priv->adap.dev, "stop condition\n");
-
priv->enabled_irqs |= UNIPHIER_FI2C_INT_STOP;
uniphier_fi2c_set_irqs(priv);
writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STO,
@@ -160,21 +156,15 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
irq_status = readl(priv->membase + UNIPHIER_FI2C_INT);
irq_status &= priv->enabled_irqs;
- dev_dbg(&priv->adap.dev,
- "interrupt: enabled_irqs=%04x, irq_status=%04x\n",
- priv->enabled_irqs, irq_status);
-
if (irq_status & UNIPHIER_FI2C_INT_STOP)
goto complete;
if (unlikely(irq_status & UNIPHIER_FI2C_INT_AL)) {
- dev_dbg(&priv->adap.dev, "arbitration lost\n");
priv->error = -EAGAIN;
goto complete;
}
if (unlikely(irq_status & UNIPHIER_FI2C_INT_NA)) {
- dev_dbg(&priv->adap.dev, "could not get ACK\n");
priv->error = -ENXIO;
if (priv->flags & UNIPHIER_FI2C_RD) {
/*
@@ -215,18 +205,14 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
if (unlikely(priv->flags & UNIPHIER_FI2C_MANUAL_NACK)) {
if (priv->len <= UNIPHIER_FI2C_FIFO_SIZE &&
!(priv->flags & UNIPHIER_FI2C_BYTE_WISE)) {
- dev_dbg(&priv->adap.dev,
- "enable read byte count IRQ\n");
priv->enabled_irqs |= UNIPHIER_FI2C_INT_RB;
uniphier_fi2c_set_irqs(priv);
priv->flags |= UNIPHIER_FI2C_BYTE_WISE;
}
- if (priv->len <= 1) {
- dev_dbg(&priv->adap.dev, "set NACK\n");
+ if (priv->len <= 1)
writel(UNIPHIER_FI2C_CR_MST |
UNIPHIER_FI2C_CR_NACK,
priv->membase + UNIPHIER_FI2C_CR);
- }
}
goto handled;
@@ -334,10 +320,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
bool is_read = msg->flags & I2C_M_RD;
unsigned long time_left, flags;
- dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, repeat=%d, stop=%d\n",
- is_read ? "receive" : "transmit", msg->addr, msg->len,
- repeat, stop);
-
priv->len = msg->len;
priv->buf = msg->buf;
priv->enabled_irqs = UNIPHIER_FI2C_INT_FAULTS;
@@ -359,7 +341,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
else
uniphier_fi2c_tx_init(priv, msg->addr, repeat);
- dev_dbg(&adap->dev, "start condition\n");
/*
* For a repeated START condition, writing a slave address to the FIFO
* kicks the controller. So, the UNIPHIER_FI2C_CR register should be
@@ -383,7 +364,6 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
uniphier_fi2c_recover(priv);
return -ETIMEDOUT;
}
- dev_dbg(&adap->dev, "complete\n");
if (unlikely(priv->flags & UNIPHIER_FI2C_DEFER_STOP_COMP)) {
u32 status;
@@ -538,7 +518,6 @@ static int uniphier_fi2c_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct uniphier_fi2c_priv *priv;
- struct resource *regs;
u32 bus_speed;
unsigned long clk_rate;
int irq, ret;
@@ -547,8 +526,7 @@ static int uniphier_fi2c_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
- regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- priv->membase = devm_ioremap_resource(dev, regs);
+ priv->membase = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(priv->membase))
return PTR_ERR(priv->membase);
diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c
index 0173840c32af..0270090c0360 100644
--- a/drivers/i2c/busses/i2c-uniphier.c
+++ b/drivers/i2c/busses/i2c-uniphier.c
@@ -71,7 +71,6 @@ static int uniphier_i2c_xfer_byte(struct i2c_adapter *adap, u32 txdata,
reinit_completion(&priv->comp);
txdata |= UNIPHIER_I2C_DTRM_IRQEN;
- dev_dbg(&adap->dev, "write data: 0x%04x\n", txdata);
writel(txdata, priv->membase + UNIPHIER_I2C_DTRM);
time_left = wait_for_completion_timeout(&priv->comp, adap->timeout);
@@ -81,8 +80,6 @@ static int uniphier_i2c_xfer_byte(struct i2c_adapter *adap, u32 txdata,
}
rxdata = readl(priv->membase + UNIPHIER_I2C_DREC);
- dev_dbg(&adap->dev, "read data: 0x%04x\n", rxdata);
-
if (rxdatap)
*rxdatap = rxdata;
@@ -98,14 +95,11 @@ static int uniphier_i2c_send_byte(struct i2c_adapter *adap, u32 txdata)
if (ret)
return ret;
- if (unlikely(rxdata & UNIPHIER_I2C_DREC_LAB)) {
- dev_dbg(&adap->dev, "arbitration lost\n");
+ if (unlikely(rxdata & UNIPHIER_I2C_DREC_LAB))
return -EAGAIN;
- }
- if (unlikely(rxdata & UNIPHIER_I2C_DREC_LRB)) {
- dev_dbg(&adap->dev, "could not get ACK\n");
+
+ if (unlikely(rxdata & UNIPHIER_I2C_DREC_LRB))
return -ENXIO;
- }
return 0;
}
@@ -115,7 +109,6 @@ static int uniphier_i2c_tx(struct i2c_adapter *adap, u16 addr, u16 len,
{
int ret;
- dev_dbg(&adap->dev, "start condition\n");
ret = uniphier_i2c_send_byte(adap, addr << 1 |
UNIPHIER_I2C_DTRM_STA |
UNIPHIER_I2C_DTRM_NACK);
@@ -137,7 +130,6 @@ static int uniphier_i2c_rx(struct i2c_adapter *adap, u16 addr, u16 len,
{
int ret;
- dev_dbg(&adap->dev, "start condition\n");
ret = uniphier_i2c_send_byte(adap, addr << 1 |
UNIPHIER_I2C_DTRM_STA |
UNIPHIER_I2C_DTRM_NACK |
@@ -161,7 +153,6 @@ static int uniphier_i2c_rx(struct i2c_adapter *adap, u16 addr, u16 len,
static int uniphier_i2c_stop(struct i2c_adapter *adap)
{
- dev_dbg(&adap->dev, "stop condition\n");
return uniphier_i2c_send_byte(adap, UNIPHIER_I2C_DTRM_STO |
UNIPHIER_I2C_DTRM_NACK);
}
@@ -173,9 +164,6 @@ static int uniphier_i2c_master_xfer_one(struct i2c_adapter *adap,
bool recovery = false;
int ret;
- dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, stop=%d\n",
- is_read ? "receive" : "transmit", msg->addr, msg->len, stop);
-
if (is_read)
ret = uniphier_i2c_rx(adap, msg->addr, msg->len, msg->buf);
else
@@ -326,7 +314,6 @@ static int uniphier_i2c_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct uniphier_i2c_priv *priv;
- struct resource *regs;
u32 bus_speed;
unsigned long clk_rate;
int irq, ret;
@@ -335,8 +322,7 @@ static int uniphier_i2c_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
- regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- priv->membase = devm_ioremap_resource(dev, regs);
+ priv->membase = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(priv->membase))
return PTR_ERR(priv->membase);
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 72b300174cb8..5f6a4985f2bc 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -350,13 +350,11 @@ static int i2c_device_probe(struct device *dev)
return -ENODEV;
if (client->flags & I2C_CLIENT_WAKE) {
- int wakeirq = -ENOENT;
+ int wakeirq;
- if (dev->of_node) {
- wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
- if (wakeirq == -EPROBE_DEFER)
- return wakeirq;
- }
+ wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
+ if (wakeirq == -EPROBE_DEFER)
+ return wakeirq;
device_init_wakeup(&client->dev, true);
@@ -966,7 +964,7 @@ struct i2c_client *devm_i2c_new_dummy_device(struct device *dev,
EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
/**
- * i2c_new_secondary_device - Helper to get the instantiated secondary address
+ * i2c_new_ancillary_device - Helper to get the instantiated secondary address
* and create the associated device
* @client: Handle to the primary client
* @name: Handle to specify which secondary address to get
@@ -985,9 +983,9 @@ EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
* cell whose "reg-names" value matches the slave name.
*
* This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
+ * i2c_unregister_device(); or an ERR_PTR to describe the error.
*/
-struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
+struct i2c_client *i2c_new_ancillary_device(struct i2c_client *client,
const char *name,
u16 default_addr)
{
@@ -1002,9 +1000,9 @@ struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
}
dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
- return i2c_new_dummy(client->adapter, addr);
+ return i2c_new_dummy_device(client->adapter, addr);
}
-EXPORT_SYMBOL_GPL(i2c_new_secondary_device);
+EXPORT_SYMBOL_GPL(i2c_new_ancillary_device);
/* ------------------------------------------------------------------------- */
diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c
index be65d3842878..92ff9991bae8 100644
--- a/drivers/i2c/i2c-slave-eeprom.c
+++ b/drivers/i2c/i2c-slave-eeprom.c
@@ -11,6 +11,13 @@
* pointer, yet implementation is deferred until the need actually arises.
*/
+/*
+ * FIXME: What to do if only 8 bits of a 16 bit address are sent?
+ * The ST-M24C64 sends only 0xff then. Needs verification with other
+ * EEPROMs, though. We currently use the 8 bit as a valid address.
+ */
+
+#include <linux/bitfield.h>
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -21,12 +28,18 @@
struct eeprom_data {
struct bin_attribute bin;
- bool first_write;
spinlock_t buffer_lock;
- u8 buffer_idx;
+ u16 buffer_idx;
+ u16 address_mask;
+ u8 num_address_bytes;
+ u8 idx_write_cnt;
u8 buffer[];
};
+#define I2C_SLAVE_BYTELEN GENMASK(15, 0)
+#define I2C_SLAVE_FLAG_ADDR16 BIT(16)
+#define I2C_SLAVE_DEVICE_MAGIC(_len, _flags) ((_flags) | (_len))
+
static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
enum i2c_slave_event event, u8 *val)
{
@@ -34,12 +47,14 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
switch (event) {
case I2C_SLAVE_WRITE_RECEIVED:
- if (eeprom->first_write) {
- eeprom->buffer_idx = *val;
- eeprom->first_write = false;
+ if (eeprom->idx_write_cnt < eeprom->num_address_bytes) {
+ if (eeprom->idx_write_cnt == 0)
+ eeprom->buffer_idx = 0;
+ eeprom->buffer_idx = *val | (eeprom->buffer_idx << 8);
+ eeprom->idx_write_cnt++;
} else {
spin_lock(&eeprom->buffer_lock);
- eeprom->buffer[eeprom->buffer_idx++] = *val;
+ eeprom->buffer[eeprom->buffer_idx++ & eeprom->address_mask] = *val;
spin_unlock(&eeprom->buffer_lock);
}
break;
@@ -50,7 +65,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
/* fallthrough */
case I2C_SLAVE_READ_REQUESTED:
spin_lock(&eeprom->buffer_lock);
- *val = eeprom->buffer[eeprom->buffer_idx];
+ *val = eeprom->buffer[eeprom->buffer_idx & eeprom->address_mask];
spin_unlock(&eeprom->buffer_lock);
/*
* Do not increment buffer_idx here, because we don't know if
@@ -61,7 +76,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client,
case I2C_SLAVE_STOP:
case I2C_SLAVE_WRITE_REQUESTED:
- eeprom->first_write = true;
+ eeprom->idx_write_cnt = 0;
break;
default:
@@ -105,13 +120,16 @@ static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_de
{
struct eeprom_data *eeprom;
int ret;
- unsigned size = id->driver_data;
+ unsigned int size = FIELD_GET(I2C_SLAVE_BYTELEN, id->driver_data);
+ unsigned int flag_addr16 = FIELD_GET(I2C_SLAVE_FLAG_ADDR16, id->driver_data);
eeprom = devm_kzalloc(&client->dev, sizeof(struct eeprom_data) + size, GFP_KERNEL);
if (!eeprom)
return -ENOMEM;
- eeprom->first_write = true;
+ eeprom->idx_write_cnt = 0;
+ eeprom->num_address_bytes = flag_addr16 ? 2 : 1;
+ eeprom->address_mask = size - 1;
spin_lock_init(&eeprom->buffer_lock);
i2c_set_clientdata(client, eeprom);
@@ -146,7 +164,9 @@ static int i2c_slave_eeprom_remove(struct i2c_client *client)
}
static const struct i2c_device_id i2c_slave_eeprom_id[] = {
- { "slave-24c02", 2048 / 8 },
+ { "slave-24c02", I2C_SLAVE_DEVICE_MAGIC(2048 / 8, 0) },
+ { "slave-24c32", I2C_SLAVE_DEVICE_MAGIC(32768 / 8, I2C_SLAVE_FLAG_ADDR16) },
+ { "slave-24c64", I2C_SLAVE_DEVICE_MAGIC(65536 / 8, I2C_SLAVE_FLAG_ADDR16) },
{ }
};
MODULE_DEVICE_TABLE(i2c, i2c_slave_eeprom_id);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 41f9e268e3fb..24244a2f68cc 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
page = sg_page_iter_page(&sg_iter);
- if (umem->writable && dirty)
- put_user_pages_dirty_lock(&page, 1);
- else
- put_user_page(page);
+ put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
}
sg_free_table(&umem->sg_head);
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index b89a9b9aef7a..469acb961fbd 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty)
{
- if (dirty)
- put_user_pages_dirty_lock(p, npages);
- else
- put_user_pages(p, npages);
+ put_user_pages_dirty_lock(p, npages, dirty);
if (mm) { /* during close after signal, mm can be NULL */
atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index bfbfbb7e0ff4..6bf764e41891 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -40,10 +40,7 @@
static void __qib_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
- if (dirty)
- put_user_pages_dirty_lock(p, num_pages);
- else
- put_user_pages(p, num_pages);
+ put_user_pages_dirty_lock(p, num_pages, dirty);
}
/**
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 0b0237d41613..62e6ffa9ad78 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
for_each_sg(chunk->page_list, sg, chunk->nents, i) {
page = sg_page(sg);
pa = sg_phys(sg);
- if (dirty)
- put_user_pages_dirty_lock(&page, 1);
- else
- put_user_page(page);
+ put_user_pages_dirty_lock(&page, 1, dirty);
usnic_dbg("pa: %pa\n", &pa);
}
kfree(chunk);
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index 87a56039f0ef..e99983f07663 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -63,15 +63,7 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
bool dirty)
{
- struct page **p = chunk->plist;
-
- while (num_pages--) {
- if (!PageDirty(*p) && dirty)
- put_user_pages_dirty_lock(p, 1);
- else
- put_user_page(*p);
- p++;
- }
+ put_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
}
void siw_umem_release(struct siw_umem *umem, bool dirty)
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 9118ab85cb3a..dab4446fe7d8 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -345,6 +345,14 @@ static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
#define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
#endif
+static void dm_integrity_prepare(struct request *rq)
+{
+}
+
+static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
/*
* DM Integrity profile, protection is performed layer above (dm-crypt)
*/
@@ -352,6 +360,8 @@ static const struct blk_integrity_profile dm_integrity_profile = {
.name = "DM-DIF-EXT-TAG",
.generate_fn = NULL,
.verify_fn = NULL,
+ .prepare_fn = dm_integrity_prepare,
+ .complete_fn = dm_integrity_complete,
};
static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
diff --git a/drivers/media/i2c/adv748x/adv748x-core.c b/drivers/media/i2c/adv748x/adv748x-core.c
index 0a47d474e97a..23e02ff27b17 100644
--- a/drivers/media/i2c/adv748x/adv748x-core.c
+++ b/drivers/media/i2c/adv748x/adv748x-core.c
@@ -183,14 +183,14 @@ static int adv748x_initialise_clients(struct adv748x_state *state)
int ret;
for (i = ADV748X_PAGE_DPLL; i < ADV748X_PAGE_MAX; ++i) {
- state->i2c_clients[i] = i2c_new_secondary_device(
+ state->i2c_clients[i] = i2c_new_ancillary_device(
state->client,
adv748x_default_addresses[i].name,
adv748x_default_addresses[i].default_addr);
- if (state->i2c_clients[i] == NULL) {
+ if (IS_ERR(state->i2c_clients[i])) {
adv_err(state, "failed to create i2c client %u\n", i);
- return -ENOMEM;
+ return PTR_ERR(state->i2c_clients[i]);
}
ret = adv748x_configure_regmap(state, i);
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index 28a84bf9f8a9..2dedd6ebb236 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -2862,10 +2862,8 @@ static void adv76xx_unregister_clients(struct adv76xx_state *state)
{
unsigned int i;
- for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i) {
- if (state->i2c_clients[i])
- i2c_unregister_device(state->i2c_clients[i]);
- }
+ for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i)
+ i2c_unregister_device(state->i2c_clients[i]);
}
static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
@@ -2878,14 +2876,14 @@ static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
struct i2c_client *new_client;
if (pdata && pdata->i2c_addresses[page])
- new_client = i2c_new_dummy(client->adapter,
+ new_client = i2c_new_dummy_device(client->adapter,
pdata->i2c_addresses[page]);
else
- new_client = i2c_new_secondary_device(client,
+ new_client = i2c_new_ancillary_device(client,
adv76xx_default_addresses[page].name,
adv76xx_default_addresses[page].default_addr);
- if (new_client)
+ if (!IS_ERR(new_client))
io_write(sd, io_reg, new_client->addr << 1);
return new_client;
@@ -3516,15 +3514,19 @@ static int adv76xx_probe(struct i2c_client *client,
}
for (i = 1; i < ADV76XX_PAGE_MAX; ++i) {
+ struct i2c_client *dummy_client;
+
if (!(BIT(i) & state->info->page_mask))
continue;
- state->i2c_clients[i] = adv76xx_dummy_client(sd, i);
- if (!state->i2c_clients[i]) {
- err = -EINVAL;
+ dummy_client = adv76xx_dummy_client(sd, i);
+ if (IS_ERR(dummy_client)) {
+ err = PTR_ERR(dummy_client);
v4l2_err(sd, "failed to create i2c client %u\n", i);
goto err_i2c;
}
+
+ state->i2c_clients[i] = dummy_client;
}
INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 518945b2f737..2cccd82a3106 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -14,7 +14,6 @@
#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/mod_devicetable.h>
-#include <linux/log2.h>
#include <linux/bitops.h>
#include <linux/jiffies.h>
#include <linux/property.h>
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1ede1763a5ee..108f60b46804 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -666,8 +666,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
- } else if (req_op(req) == REQ_OP_WRITE) {
- t10_pi_prepare(req, ns->pi_type);
}
switch (ns->pi_type) {
@@ -690,13 +688,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
void nvme_cleanup_cmd(struct request *req)
{
- if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
- nvme_req(req)->status == 0) {
- struct nvme_ns *ns = req->rq_disk->private_data;
-
- t10_pi_complete(req, ns->pi_type,
- blk_rq_bytes(req) >> ns->lba_shift);
- }
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
struct nvme_ns *ns = req->rq_disk->private_data;
struct page *page = req->special_vec.bv_page;
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 1b67bb578f9f..ae21d08c65e8 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -674,6 +674,7 @@ config EEEPC_LAPTOP
config ASUS_WMI
tristate "ASUS WMI Driver"
depends on ACPI_WMI
+ depends on ACPI_BATTERY
depends on INPUT
depends on HWMON
depends on BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c
index 61fe341a85aa..ea68f6ed66ae 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -90,7 +90,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
for (i = 0; i < multi->num_clients && inst_data[i].type; i++) {
memset(&board_info, 0, sizeof(board_info));
strlcpy(board_info.type, inst_data[i].type, I2C_NAME_SIZE);
- snprintf(name, sizeof(name), "%s-%s.%d", match->id,
+ snprintf(name, sizeof(name), "%s-%s.%d", dev_name(dev),
inst_data[i].type, i);
board_info.dev_name = name;
switch (inst_data[i].flags & IRQ_RESOURCE_TYPE) {
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
index 9aca5e7ce6d0..07d1b911e72f 100644
--- a/drivers/platform/x86/pmc_atom.c
+++ b/drivers/platform/x86/pmc_atom.c
@@ -422,6 +422,13 @@ static const struct dmi_system_id critclk_systems[] = {
DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"),
},
},
+ {
+ .ident = "SIMATIC IPC277E",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "6AV7882-0"),
+ },
+ },
{ /*sentinel*/ }
};
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7623196de9e3..50928bc266eb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1211,9 +1211,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
dix = scsi_prot_sg_count(cmd);
dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
- if (write && dix)
- t10_pi_prepare(cmd->request, sdkp->protection_type);
-
if (dif || dix)
protect = sd_setup_protect_cmnd(cmd, dix, dif);
else
@@ -2055,11 +2052,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
"sd_done: completed %d of %d bytes\n",
good_bytes, scsi_bufflen(SCpnt)));
- if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
- good_bytes)
- t10_pi_complete(SCpnt->request, sdkp->protection_type,
- good_bytes / scsi_prot_interval(SCpnt));
-
return good_bytes;
}
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index aa8d8425be25..b83a1d16bd89 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -120,7 +120,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
if (!page)
goto free_pages;
list_add_tail(&page->lru, &pages);
- size_remaining -= PAGE_SIZE << compound_order(page);
+ size_remaining -= page_size(page);
max_order = compound_order(page);
i++;
}
@@ -133,7 +133,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
sg = table->sgl;
list_for_each_entry_safe(page, tmp_page, &pages, lru) {
- sg_set_page(sg, page, PAGE_SIZE << compound_order(page), 0);
+ sg_set_page(sg, page, page_size(page), 0);
sg = sg_next(sg);
list_del(&page->lru);
}
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index a254792d882c..1354a157e9af 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -136,8 +136,7 @@ int ft_queue_data_in(struct se_cmd *se_cmd)
page, off_in_page, tlen);
fr_len(fp) += tlen;
fp_skb(fp)->data_len += tlen;
- fp_skb(fp)->truesize +=
- PAGE_SIZE << compound_order(page);
+ fp_skb(fp)->truesize += page_size(page);
} else {
BUG_ON(!page);
from = kmap_atomic(page + (mem_off >> PAGE_SHIFT));
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 213ff03c8a9f..59d9d512dcda 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -17,6 +17,7 @@
#include <linux/blkdev.h>
#include <linux/pagemap.h>
#include <linux/export.h>
+#include <linux/fs_parser.h>
#include <linux/hid.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -1451,9 +1452,9 @@ struct ffs_sb_fill_data {
struct ffs_data *ffs_data;
};
-static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
+static int ffs_sb_fill(struct super_block *sb, struct fs_context *fc)
{
- struct ffs_sb_fill_data *data = _data;
+ struct ffs_sb_fill_data *data = fc->fs_private;
struct inode *inode;
struct ffs_data *ffs = data->ffs_data;
@@ -1486,147 +1487,152 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
return 0;
}
-static int ffs_fs_parse_opts(struct ffs_sb_fill_data *data, char *opts)
-{
- ENTER();
+enum {
+ Opt_no_disconnect,
+ Opt_rmode,
+ Opt_fmode,
+ Opt_mode,
+ Opt_uid,
+ Opt_gid,
+};
- if (!opts || !*opts)
- return 0;
+static const struct fs_parameter_spec ffs_fs_param_specs[] = {
+ fsparam_bool ("no_disconnect", Opt_no_disconnect),
+ fsparam_u32 ("rmode", Opt_rmode),
+ fsparam_u32 ("fmode", Opt_fmode),
+ fsparam_u32 ("mode", Opt_mode),
+ fsparam_u32 ("uid", Opt_uid),
+ fsparam_u32 ("gid", Opt_gid),
+ {}
+};
- for (;;) {
- unsigned long value;
- char *eq, *comma;
-
- /* Option limit */
- comma = strchr(opts, ',');
- if (comma)
- *comma = 0;
-
- /* Value limit */
- eq = strchr(opts, '=');
- if (unlikely(!eq)) {
- pr_err("'=' missing in %s\n", opts);
- return -EINVAL;
- }
- *eq = 0;
+static const struct fs_parameter_description ffs_fs_fs_parameters = {
+ .name = "kAFS",
+ .specs = ffs_fs_param_specs,
+};
- /* Parse value */
- if (kstrtoul(eq + 1, 0, &value)) {
- pr_err("%s: invalid value: %s\n", opts, eq + 1);
- return -EINVAL;
- }
+static int ffs_fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct ffs_sb_fill_data *data = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
- /* Interpret option */
- switch (eq - opts) {
- case 13:
- if (!memcmp(opts, "no_disconnect", 13))
- data->no_disconnect = !!value;
- else
- goto invalid;
- break;
- case 5:
- if (!memcmp(opts, "rmode", 5))
- data->root_mode = (value & 0555) | S_IFDIR;
- else if (!memcmp(opts, "fmode", 5))
- data->perms.mode = (value & 0666) | S_IFREG;
- else
- goto invalid;
- break;
+ ENTER();
- case 4:
- if (!memcmp(opts, "mode", 4)) {
- data->root_mode = (value & 0555) | S_IFDIR;
- data->perms.mode = (value & 0666) | S_IFREG;
- } else {
- goto invalid;
- }
- break;
+ opt = fs_parse(fc, &ffs_fs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
- case 3:
- if (!memcmp(opts, "uid", 3)) {
- data->perms.uid = make_kuid(current_user_ns(), value);
- if (!uid_valid(data->perms.uid)) {
- pr_err("%s: unmapped value: %lu\n", opts, value);
- return -EINVAL;
- }
- } else if (!memcmp(opts, "gid", 3)) {
- data->perms.gid = make_kgid(current_user_ns(), value);
- if (!gid_valid(data->perms.gid)) {
- pr_err("%s: unmapped value: %lu\n", opts, value);
- return -EINVAL;
- }
- } else {
- goto invalid;
- }
- break;
+ switch (opt) {
+ case Opt_no_disconnect:
+ data->no_disconnect = result.boolean;
+ break;
+ case Opt_rmode:
+ data->root_mode = (result.uint_32 & 0555) | S_IFDIR;
+ break;
+ case Opt_fmode:
+ data->perms.mode = (result.uint_32 & 0666) | S_IFREG;
+ break;
+ case Opt_mode:
+ data->root_mode = (result.uint_32 & 0555) | S_IFDIR;
+ data->perms.mode = (result.uint_32 & 0666) | S_IFREG;
+ break;
- default:
-invalid:
- pr_err("%s: invalid option\n", opts);
- return -EINVAL;
- }
+ case Opt_uid:
+ data->perms.uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(data->perms.uid))
+ goto unmapped_value;
+ break;
+ case Opt_gid:
+ data->perms.gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(data->perms.gid))
+ goto unmapped_value;
+ break;
- /* Next iteration */
- if (!comma)
- break;
- opts = comma + 1;
+ default:
+ return -ENOPARAM;
}
return 0;
-}
-/* "mount -t functionfs dev_name /dev/function" ends up here */
+unmapped_value:
+ return invalf(fc, "%s: unmapped value: %u", param->key, result.uint_32);
+}
-static struct dentry *
-ffs_fs_mount(struct file_system_type *t, int flags,
- const char *dev_name, void *opts)
-{
- struct ffs_sb_fill_data data = {
- .perms = {
- .mode = S_IFREG | 0600,
- .uid = GLOBAL_ROOT_UID,
- .gid = GLOBAL_ROOT_GID,
- },
- .root_mode = S_IFDIR | 0500,
- .no_disconnect = false,
- };
- struct dentry *rv;
- int ret;
+/*
+ * Set up the superblock for a mount.
+ */
+static int ffs_fs_get_tree(struct fs_context *fc)
+{
+ struct ffs_sb_fill_data *ctx = fc->fs_private;
void *ffs_dev;
struct ffs_data *ffs;
ENTER();
- ret = ffs_fs_parse_opts(&data, opts);
- if (unlikely(ret < 0))
- return ERR_PTR(ret);
+ if (!fc->source)
+ return invalf(fc, "No source specified");
- ffs = ffs_data_new(dev_name);
+ ffs = ffs_data_new(fc->source);
if (unlikely(!ffs))
- return ERR_PTR(-ENOMEM);
- ffs->file_perms = data.perms;
- ffs->no_disconnect = data.no_disconnect;
+ return -ENOMEM;
+ ffs->file_perms = ctx->perms;
+ ffs->no_disconnect = ctx->no_disconnect;
- ffs->dev_name = kstrdup(dev_name, GFP_KERNEL);
+ ffs->dev_name = kstrdup(fc->source, GFP_KERNEL);
if (unlikely(!ffs->dev_name)) {
ffs_data_put(ffs);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
- ffs_dev = ffs_acquire_dev(dev_name);
+ ffs_dev = ffs_acquire_dev(ffs->dev_name);
if (IS_ERR(ffs_dev)) {
ffs_data_put(ffs);
- return ERR_CAST(ffs_dev);
+ return PTR_ERR(ffs_dev);
}
+
ffs->private_data = ffs_dev;
- data.ffs_data = ffs;
+ ctx->ffs_data = ffs;
+ return get_tree_nodev(fc, ffs_sb_fill);
+}
+
+static void ffs_fs_free_fc(struct fs_context *fc)
+{
+ struct ffs_sb_fill_data *ctx = fc->fs_private;
+
+ if (ctx) {
+ if (ctx->ffs_data) {
+ ffs_release_dev(ctx->ffs_data);
+ ffs_data_put(ctx->ffs_data);
+ }
- rv = mount_nodev(t, flags, &data, ffs_sb_fill);
- if (IS_ERR(rv) && data.ffs_data) {
- ffs_release_dev(data.ffs_data);
- ffs_data_put(data.ffs_data);
+ kfree(ctx);
}
- return rv;
+}
+
+static const struct fs_context_operations ffs_fs_context_ops = {
+ .free = ffs_fs_free_fc,
+ .parse_param = ffs_fs_parse_param,
+ .get_tree = ffs_fs_get_tree,
+};
+
+static int ffs_fs_init_fs_context(struct fs_context *fc)
+{
+ struct ffs_sb_fill_data *ctx;
+
+ ctx = kzalloc(sizeof(struct ffs_sb_fill_data), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->perms.mode = S_IFREG | 0600;
+ ctx->perms.uid = GLOBAL_ROOT_UID;
+ ctx->perms.gid = GLOBAL_ROOT_GID;
+ ctx->root_mode = S_IFDIR | 0500;
+ ctx->no_disconnect = false;
+
+ fc->fs_private = ctx;
+ fc->ops = &ffs_fs_context_ops;
+ return 0;
}
static void
@@ -1644,7 +1650,8 @@ ffs_fs_kill_sb(struct super_block *sb)
static struct file_system_type ffs_fs_type = {
.owner = THIS_MODULE,
.name = "functionfs",
- .mount = ffs_fs_mount,
+ .init_fs_context = ffs_fs_init_fs_context,
+ .parameters = &ffs_fs_fs_parameters,
.kill_sb = ffs_fs_kill_sb,
};
MODULE_ALIAS_FS("functionfs");
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 3b18fa4d090a..26cef65b41e7 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -176,13 +176,13 @@ put_exit:
}
static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
- unsigned int page_shift)
+ unsigned int it_page_shift)
{
struct page *page;
unsigned long size = 0;
- if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
- return size == (1UL << page_shift);
+ if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
+ return size == (1UL << it_page_shift);
page = pfn_to_page(hpa >> PAGE_SHIFT);
/*
@@ -190,7 +190,7 @@ static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
* a page we just found. Otherwise the hardware can get access to
* a bigger memory chunk that it should.
*/
- return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
+ return page_shift(compound_head(page)) >= it_page_shift;
}
static inline bool tce_groups_attached(struct tce_container *container)
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index c559f706ae7e..156360e37714 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -48,6 +48,7 @@
/* Includes */
#include <linux/acpi.h> /* For ACPI support */
+#include <linux/bits.h> /* For BIT() */
#include <linux/module.h> /* For module specific items */
#include <linux/moduleparam.h> /* For new moduleparam's */
#include <linux/types.h> /* For standard types (like size_t) */
@@ -215,6 +216,23 @@ static int update_no_reboot_bit_mem(void *priv, bool set)
return 0;
}
+static int update_no_reboot_bit_cnt(void *priv, bool set)
+{
+ struct iTCO_wdt_private *p = priv;
+ u16 val, newval;
+
+ val = inw(TCO1_CNT(p));
+ if (set)
+ val |= BIT(0);
+ else
+ val &= ~BIT(0);
+ outw(val, TCO1_CNT(p));
+ newval = inw(TCO1_CNT(p));
+
+ /* make sure the update is successful */
+ return val != newval ? -EIO : 0;
+}
+
static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
struct itco_wdt_platform_data *pdata)
{
@@ -224,7 +242,9 @@ static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p,
return;
}
- if (p->iTCO_version >= 2)
+ if (p->iTCO_version >= 6)
+ p->update_no_reboot_bit = update_no_reboot_bit_cnt;
+ else if (p->iTCO_version >= 2)
p->update_no_reboot_bit = update_no_reboot_bit_mem;
else if (p->iTCO_version == 1)
p->update_no_reboot_bit = update_no_reboot_bit_pci;
@@ -452,7 +472,8 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
* Get the Memory-Mapped GCS or PMC register, we need it for the
* NO_REBOOT flag (TCO v2 and v3).
*/
- if (p->iTCO_version >= 2 && !pdata->update_no_reboot_bit) {
+ if (p->iTCO_version >= 2 && p->iTCO_version < 6 &&
+ !pdata->update_no_reboot_bit) {
p->gcs_pmc_res = platform_get_resource(pdev,
IORESOURCE_MEM,
ICH_RES_MEM_GCS_PMC);
@@ -502,6 +523,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
/* Clear out the (probably old) status */
switch (p->iTCO_version) {
+ case 6:
case 5:
case 4:
outw(0x0008, TCO1_STS(p)); /* Clear the Time Out Status bit */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d4e11b2e04f6..cec3b4146440 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -670,26 +670,6 @@ out:
* libraries. There is no binary dependent code anywhere else.
*/
-#ifndef STACK_RND_MASK
-#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
-#endif
-
-static unsigned long randomize_stack_top(unsigned long stack_top)
-{
- unsigned long random_variable = 0;
-
- if (current->flags & PF_RANDOMIZE) {
- random_variable = get_random_long();
- random_variable &= STACK_RND_MASK;
- random_variable <<= PAGE_SHIFT;
- }
-#ifdef CONFIG_STACK_GROWSUP
- return PAGE_ALIGN(stack_top) + random_variable;
-#else
- return PAGE_ALIGN(stack_top) - random_variable;
-#endif
-}
-
static int load_elf_binary(struct linux_binprm *bprm)
{
struct file *interpreter = NULL; /* to shut gcc up */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 1bda2ab6745b..814ad2c2ba80 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1100,8 +1100,11 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
err = -ENOMEM;
goto error;
}
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(bhs[n]);
memset(bhs[n]->b_data, 0, sb->s_blocksize);
set_buffer_uptodate(bhs[n]);
+ unlock_buffer(bhs[n]);
mark_buffer_dirty_inode(bhs[n], dir);
n++;
@@ -1158,6 +1161,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
de = (struct msdos_dir_entry *)bhs[0]->b_data;
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(bhs[0]);
/* filling the new directory slots ("." and ".." entries) */
memcpy(de[0].name, MSDOS_DOT, MSDOS_NAME);
memcpy(de[1].name, MSDOS_DOTDOT, MSDOS_NAME);
@@ -1180,6 +1185,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
de[0].size = de[1].size = 0;
memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
set_buffer_uptodate(bhs[0]);
+ unlock_buffer(bhs[0]);
mark_buffer_dirty_inode(bhs[0], dir);
err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
@@ -1237,11 +1243,14 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
/* fill the directory entry */
copy = min(size, sb->s_blocksize);
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(bhs[n]);
memcpy(bhs[n]->b_data, slots, copy);
- slots += copy;
- size -= copy;
set_buffer_uptodate(bhs[n]);
+ unlock_buffer(bhs[n]);
mark_buffer_dirty_inode(bhs[n], dir);
+ slots += copy;
+ size -= copy;
if (!size)
break;
n++;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 265983635f2b..3647c65a0f48 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -388,8 +388,11 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
err = -ENOMEM;
goto error;
}
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(c_bh);
memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
set_buffer_uptodate(c_bh);
+ unlock_buffer(c_bh);
mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
if (sb->s_flags & SB_SYNCHRONOUS)
err = sync_dirty_buffer(c_bh);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6b450065b9d5..5f89c515f5bb 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -584,10 +584,10 @@ struct gfs2_args {
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
unsigned int ar_loccookie:1; /* use location based readdir
cookies */
- int ar_commit; /* Commit interval */
- int ar_statfs_quantum; /* The fast statfs interval */
- int ar_quota_quantum; /* The quota interval */
- int ar_statfs_percent; /* The % change to force sync */
+ s32 ar_commit; /* Commit interval */
+ s32 ar_statfs_quantum; /* The fast statfs interval */
+ s32 ar_quota_quantum; /* The quota interval */
+ s32 ar_statfs_percent; /* The % change to force sync */
};
struct gfs2_tune {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f3fd5cd9d43f..681b44682b0d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -21,6 +21,7 @@
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
+#include <linux/fs_parser.h>
#include "gfs2.h"
#include "incore.h"
@@ -1031,16 +1032,17 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
}
/**
- * fill_super - Read in superblock
+ * gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
- * @data: Mount options
+ * @args: Mount options
* @silent: Don't complain if it's not a GFS2 filesystem
*
- * Returns: errno
+ * Returns: -errno
*/
-
-static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
+static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct gfs2_args *args = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
int error;
@@ -1205,161 +1207,411 @@ fail_debug:
return error;
}
-static int set_gfs2_super(struct super_block *s, void *data)
+/**
+ * gfs2_get_tree - Get the GFS2 superblock and root directory
+ * @fc: The filesystem context
+ *
+ * Returns: 0 or -errno on error
+ */
+static int gfs2_get_tree(struct fs_context *fc)
{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ struct gfs2_args *args = fc->fs_private;
+ struct gfs2_sbd *sdp;
+ int error;
+
+ error = get_tree_bdev(fc, gfs2_fill_super);
+ if (error)
+ return error;
+
+ sdp = fc->root->d_sb->s_fs_info;
+ dput(fc->root);
+ if (args->ar_meta)
+ fc->root = dget(sdp->sd_master_dir);
+ else
+ fc->root = dget(sdp->sd_root_dir);
return 0;
}
-static int test_gfs2_super(struct super_block *s, void *ptr)
+static void gfs2_fc_free(struct fs_context *fc)
{
- struct block_device *bdev = ptr;
- return (bdev == s->s_bdev);
+ struct gfs2_args *args = fc->fs_private;
+
+ kfree(args);
}
-/**
- * gfs2_mount - Get the GFS2 superblock
- * @fs_type: The GFS2 filesystem type
- * @flags: Mount flags
- * @dev_name: The name of the device
- * @data: The mount arguments
- *
- * Q. Why not use get_sb_bdev() ?
- * A. We need to select one of two root directories to mount, independent
- * of whether this is the initial, or subsequent, mount of this sb
- *
- * Returns: 0 or -ve on error
- */
+enum gfs2_param {
+ Opt_lockproto,
+ Opt_locktable,
+ Opt_hostdata,
+ Opt_spectator,
+ Opt_ignore_local_fs,
+ Opt_localflocks,
+ Opt_localcaching,
+ Opt_debug,
+ Opt_upgrade,
+ Opt_acl,
+ Opt_quota,
+ Opt_suiddir,
+ Opt_data,
+ Opt_meta,
+ Opt_discard,
+ Opt_commit,
+ Opt_errors,
+ Opt_statfs_quantum,
+ Opt_statfs_percent,
+ Opt_quota_quantum,
+ Opt_barrier,
+ Opt_rgrplvb,
+ Opt_loccookie,
+};
-static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- struct block_device *bdev;
- struct super_block *s;
- fmode_t mode = FMODE_READ | FMODE_EXCL;
- int error;
- struct gfs2_args args;
- struct gfs2_sbd *sdp;
+enum opt_quota {
+ Opt_quota_unset = 0,
+ Opt_quota_off,
+ Opt_quota_account,
+ Opt_quota_on,
+};
+
+static const unsigned int opt_quota_values[] = {
+ [Opt_quota_off] = GFS2_QUOTA_OFF,
+ [Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
+ [Opt_quota_on] = GFS2_QUOTA_ON,
+};
- if (!(flags & SB_RDONLY))
- mode |= FMODE_WRITE;
+enum opt_data {
+ Opt_data_writeback = GFS2_DATA_WRITEBACK,
+ Opt_data_ordered = GFS2_DATA_ORDERED,
+};
- bdev = blkdev_get_by_path(dev_name, mode, fs_type);
- if (IS_ERR(bdev))
- return ERR_CAST(bdev);
+enum opt_errors {
+ Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
+ Opt_errors_panic = GFS2_ERRORS_PANIC,
+};
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- error = -EBUSY;
- goto error_bdev;
- }
- s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- error = PTR_ERR(s);
- if (IS_ERR(s))
- goto error_bdev;
-
- if (s->s_root) {
- /*
- * s_umount nests inside bd_mutex during
- * __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
- * s_umount temporarily. This is safe as we're
- * holding an active reference.
- */
- up_write(&s->s_umount);
- blkdev_put(bdev, mode);
- down_write(&s->s_umount);
- } else {
- /* s_mode must be set before deactivate_locked_super calls */
- s->s_mode = mode;
- }
+static const struct fs_parameter_spec gfs2_param_specs[] = {
+ fsparam_string ("lockproto", Opt_lockproto),
+ fsparam_string ("locktable", Opt_locktable),
+ fsparam_string ("hostdata", Opt_hostdata),
+ fsparam_flag ("spectator", Opt_spectator),
+ fsparam_flag ("norecovery", Opt_spectator),
+ fsparam_flag ("ignore_local_fs", Opt_ignore_local_fs),
+ fsparam_flag ("localflocks", Opt_localflocks),
+ fsparam_flag ("localcaching", Opt_localcaching),
+ fsparam_flag_no("debug", Opt_debug),
+ fsparam_flag ("upgrade", Opt_upgrade),
+ fsparam_flag_no("acl", Opt_acl),
+ fsparam_flag_no("suiddir", Opt_suiddir),
+ fsparam_enum ("data", Opt_data),
+ fsparam_flag ("meta", Opt_meta),
+ fsparam_flag_no("discard", Opt_discard),
+ fsparam_s32 ("commit", Opt_commit),
+ fsparam_enum ("errors", Opt_errors),
+ fsparam_s32 ("statfs_quantum", Opt_statfs_quantum),
+ fsparam_s32 ("statfs_percent", Opt_statfs_percent),
+ fsparam_s32 ("quota_quantum", Opt_quota_quantum),
+ fsparam_flag_no("barrier", Opt_barrier),
+ fsparam_flag_no("rgrplvb", Opt_rgrplvb),
+ fsparam_flag_no("loccookie", Opt_loccookie),
+ /* quota can be a flag or an enum so it gets special treatment */
+ __fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
+ {}
+};
- memset(&args, 0, sizeof(args));
- args.ar_quota = GFS2_QUOTA_DEFAULT;
- args.ar_data = GFS2_DATA_DEFAULT;
- args.ar_commit = 30;
- args.ar_statfs_quantum = 30;
- args.ar_quota_quantum = 60;
- args.ar_errors = GFS2_ERRORS_DEFAULT;
+static const struct fs_parameter_enum gfs2_param_enums[] = {
+ { Opt_quota, "off", Opt_quota_off },
+ { Opt_quota, "account", Opt_quota_account },
+ { Opt_quota, "on", Opt_quota_on },
+ { Opt_data, "writeback", Opt_data_writeback },
+ { Opt_data, "ordered", Opt_data_ordered },
+ { Opt_errors, "withdraw", Opt_errors_withdraw },
+ { Opt_errors, "panic", Opt_errors_panic },
+ {}
+};
- error = gfs2_mount_args(&args, data);
- if (error) {
- pr_warn("can't parse mount arguments\n");
- goto error_super;
+const struct fs_parameter_description gfs2_fs_parameters = {
+ .name = "gfs2",
+ .specs = gfs2_param_specs,
+ .enums = gfs2_param_enums,
+};
+
+/* Parse a single mount parameter */
+static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct gfs2_args *args = fc->fs_private;
+ struct fs_parse_result result;
+ int o;
+
+ o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
+ if (o < 0)
+ return o;
+
+ switch (o) {
+ case Opt_lockproto:
+ strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_locktable:
+ strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_hostdata:
+ strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_spectator:
+ args->ar_spectator = 1;
+ break;
+ case Opt_ignore_local_fs:
+ /* Retained for backwards compat only */
+ break;
+ case Opt_localflocks:
+ args->ar_localflocks = 1;
+ break;
+ case Opt_localcaching:
+ /* Retained for backwards compat only */
+ break;
+ case Opt_debug:
+ if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
+ return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+ args->ar_debug = result.boolean;
+ break;
+ case Opt_upgrade:
+ /* Retained for backwards compat only */
+ break;
+ case Opt_acl:
+ args->ar_posix_acl = result.boolean;
+ break;
+ case Opt_quota:
+ /* The quota option can be a flag or an enum. A non-zero int_32
+ result means that we have an enum index. Otherwise we have
+ to rely on the 'negated' flag to tell us whether 'quota' or
+ 'noquota' was specified. */
+ if (result.negated)
+ args->ar_quota = GFS2_QUOTA_OFF;
+ else if (result.int_32 > 0)
+ args->ar_quota = opt_quota_values[result.int_32];
+ else
+ args->ar_quota = GFS2_QUOTA_ON;
+ break;
+ case Opt_suiddir:
+ args->ar_suiddir = result.boolean;
+ break;
+ case Opt_data:
+ /* The uint_32 result maps directly to GFS2_DATA_* */
+ args->ar_data = result.uint_32;
+ break;
+ case Opt_meta:
+ args->ar_meta = 1;
+ break;
+ case Opt_discard:
+ args->ar_discard = result.boolean;
+ break;
+ case Opt_commit:
+ if (result.int_32 <= 0)
+ return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
+ args->ar_commit = result.int_32;
+ break;
+ case Opt_statfs_quantum:
+ if (result.int_32 < 0)
+ return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
+ args->ar_statfs_quantum = result.int_32;
+ break;
+ case Opt_quota_quantum:
+ if (result.int_32 <= 0)
+ return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
+ args->ar_quota_quantum = result.int_32;
+ break;
+ case Opt_statfs_percent:
+ if (result.int_32 < 0 || result.int_32 > 100)
+ return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
+ args->ar_statfs_percent = result.int_32;
+ break;
+ case Opt_errors:
+ if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
+ return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+ args->ar_errors = result.uint_32;
+ break;
+ case Opt_barrier:
+ args->ar_nobarrier = result.boolean;
+ break;
+ case Opt_rgrplvb:
+ args->ar_rgrplvb = result.boolean;
+ break;
+ case Opt_loccookie:
+ args->ar_loccookie = result.boolean;
+ break;
+ default:
+ return invalf(fc, "gfs2: invalid mount option: %s", param->key);
}
+ return 0;
+}
- if (s->s_root) {
- error = -EBUSY;
- if ((flags ^ s->s_flags) & SB_RDONLY)
- goto error_super;
- } else {
- snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- sb_set_blocksize(s, block_size(bdev));
- error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);
- if (error)
- goto error_super;
- s->s_flags |= SB_ACTIVE;
- bdev->bd_super = s;
+static int gfs2_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_args *oldargs = &sdp->sd_args;
+ struct gfs2_args *newargs = fc->fs_private;
+ struct gfs2_tune *gt = &sdp->sd_tune;
+ int error = 0;
+
+ sync_filesystem(sb);
+
+ spin_lock(&gt->gt_spin);
+ oldargs->ar_commit = gt->gt_logd_secs;
+ oldargs->ar_quota_quantum = gt->gt_quota_quantum;
+ if (gt->gt_statfs_slow)
+ oldargs->ar_statfs_quantum = 0;
+ else
+ oldargs->ar_statfs_quantum = gt->gt_statfs_quantum;
+ spin_unlock(&gt->gt_spin);
+
+ if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
+ errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
+ return -EINVAL;
+ }
+ if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
+ errorf(fc, "gfs2: reconfiguration of lock table not allowed");
+ return -EINVAL;
+ }
+ if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
+ errorf(fc, "gfs2: reconfiguration of host data not allowed");
+ return -EINVAL;
+ }
+ if (newargs->ar_spectator != oldargs->ar_spectator) {
+ errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
+ return -EINVAL;
+ }
+ if (newargs->ar_localflocks != oldargs->ar_localflocks) {
+ errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
+ return -EINVAL;
+ }
+ if (newargs->ar_meta != oldargs->ar_meta) {
+ errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
+ return -EINVAL;
+ }
+ if (oldargs->ar_spectator)
+ fc->sb_flags |= SB_RDONLY;
+
+ if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
+ error = gfs2_make_fs_ro(sdp);
+ if (error)
+ errorf(fc, "gfs2: unable to remount read-only");
+ } else {
+ error = gfs2_make_fs_rw(sdp);
+ if (error)
+ errorf(fc, "gfs2: unable to remount read-write");
+ }
}
+ sdp->sd_args = *newargs;
- sdp = s->s_fs_info;
- if (args.ar_meta)
- return dget(sdp->sd_master_dir);
+ if (sdp->sd_args.ar_posix_acl)
+ sb->s_flags |= SB_POSIXACL;
+ else
+ sb->s_flags &= ~SB_POSIXACL;
+ if (sdp->sd_args.ar_nobarrier)
+ set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
else
- return dget(sdp->sd_root_dir);
-
-error_super:
- deactivate_locked_super(s);
- return ERR_PTR(error);
-error_bdev:
- blkdev_put(bdev, mode);
- return ERR_PTR(error);
+ clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+ spin_lock(&gt->gt_spin);
+ gt->gt_logd_secs = newargs->ar_commit;
+ gt->gt_quota_quantum = newargs->ar_quota_quantum;
+ if (newargs->ar_statfs_quantum) {
+ gt->gt_statfs_slow = 0;
+ gt->gt_statfs_quantum = newargs->ar_statfs_quantum;
+ }
+ else {
+ gt->gt_statfs_slow = 1;
+ gt->gt_statfs_quantum = 30;
+ }
+ spin_unlock(&gt->gt_spin);
+
+ gfs2_online_uevent(sdp);
+ return error;
+}
+
+static const struct fs_context_operations gfs2_context_ops = {
+ .free = gfs2_fc_free,
+ .parse_param = gfs2_parse_param,
+ .get_tree = gfs2_get_tree,
+ .reconfigure = gfs2_reconfigure,
+};
+
+/* Set up the filesystem mount context */
+static int gfs2_init_fs_context(struct fs_context *fc)
+{
+ struct gfs2_args *args;
+
+ args = kzalloc(sizeof(*args), GFP_KERNEL);
+ if (args == NULL)
+ return -ENOMEM;
+
+ args->ar_quota = GFS2_QUOTA_DEFAULT;
+ args->ar_data = GFS2_DATA_DEFAULT;
+ args->ar_commit = 30;
+ args->ar_statfs_quantum = 30;
+ args->ar_quota_quantum = 60;
+ args->ar_errors = GFS2_ERRORS_DEFAULT;
+
+ fc->fs_private = args;
+ fc->ops = &gfs2_context_ops;
+ return 0;
}
-static int set_meta_super(struct super_block *s, void *ptr)
+static int set_meta_super(struct super_block *s, struct fs_context *fc)
{
return -EINVAL;
}
-static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int test_meta_super(struct super_block *s, struct fs_context *fc)
+{
+ return (fc->sget_key == s->s_bdev);
+}
+
+static int gfs2_meta_get_tree(struct fs_context *fc)
{
struct super_block *s;
struct gfs2_sbd *sdp;
struct path path;
int error;
- if (!dev_name || !*dev_name)
- return ERR_PTR(-EINVAL);
+ if (!fc->source || !*fc->source)
+ return -EINVAL;
- error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
+ error = kern_path(fc->source, LOOKUP_FOLLOW, &path);
if (error) {
pr_warn("path_lookup on %s returned error %d\n",
- dev_name, error);
- return ERR_PTR(error);
+ fc->source, error);
+ return error;
}
- s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
- path.dentry->d_sb->s_bdev);
+ fc->fs_type = &gfs2_fs_type;
+ fc->sget_key = path.dentry->d_sb->s_bdev;
+ s = sget_fc(fc, test_meta_super, set_meta_super);
path_put(&path);
if (IS_ERR(s)) {
pr_warn("gfs2 mount does not exist\n");
- return ERR_CAST(s);
+ return PTR_ERR(s);
}
- if ((flags ^ s->s_flags) & SB_RDONLY) {
+ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
deactivate_locked_super(s);
- return ERR_PTR(-EBUSY);
+ return -EBUSY;
}
sdp = s->s_fs_info;
- return dget(sdp->sd_master_dir);
+ fc->root = dget(sdp->sd_master_dir);
+ return 0;
+}
+
+static const struct fs_context_operations gfs2_meta_context_ops = {
+ .get_tree = gfs2_meta_get_tree,
+};
+
+static int gfs2_meta_init_fs_context(struct fs_context *fc)
+{
+ int ret = gfs2_init_fs_context(fc);
+
+ if (ret)
+ return ret;
+
+ fc->ops = &gfs2_meta_context_ops;
+ return 0;
}
static void gfs2_kill_sb(struct super_block *sb)
@@ -1383,7 +1635,8 @@ static void gfs2_kill_sb(struct super_block *sb)
struct file_system_type gfs2_fs_type = {
.name = "gfs2",
.fs_flags = FS_REQUIRES_DEV,
- .mount = gfs2_mount,
+ .init_fs_context = gfs2_init_fs_context,
+ .parameters = &gfs2_fs_parameters,
.kill_sb = gfs2_kill_sb,
.owner = THIS_MODULE,
};
@@ -1392,7 +1645,7 @@ MODULE_ALIAS_FS("gfs2");
struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
- .mount = gfs2_mount_meta,
+ .init_fs_context = gfs2_meta_init_fs_context,
.owner = THIS_MODULE,
};
MODULE_ALIAS_FS("gfs2meta");
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 644c70ae09f7..5fa1eec4fb4f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -44,258 +44,6 @@
#include "xattr.h"
#include "lops.h"
-#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
-
-enum {
- Opt_lockproto,
- Opt_locktable,
- Opt_hostdata,
- Opt_spectator,
- Opt_ignore_local_fs,
- Opt_localflocks,
- Opt_localcaching,
- Opt_debug,
- Opt_nodebug,
- Opt_upgrade,
- Opt_acl,
- Opt_noacl,
- Opt_quota_off,
- Opt_quota_account,
- Opt_quota_on,
- Opt_quota,
- Opt_noquota,
- Opt_suiddir,
- Opt_nosuiddir,
- Opt_data_writeback,
- Opt_data_ordered,
- Opt_meta,
- Opt_discard,
- Opt_nodiscard,
- Opt_commit,
- Opt_err_withdraw,
- Opt_err_panic,
- Opt_statfs_quantum,
- Opt_statfs_percent,
- Opt_quota_quantum,
- Opt_barrier,
- Opt_nobarrier,
- Opt_rgrplvb,
- Opt_norgrplvb,
- Opt_loccookie,
- Opt_noloccookie,
- Opt_error,
-};
-
-static const match_table_t tokens = {
- {Opt_lockproto, "lockproto=%s"},
- {Opt_locktable, "locktable=%s"},
- {Opt_hostdata, "hostdata=%s"},
- {Opt_spectator, "spectator"},
- {Opt_spectator, "norecovery"},
- {Opt_ignore_local_fs, "ignore_local_fs"},
- {Opt_localflocks, "localflocks"},
- {Opt_localcaching, "localcaching"},
- {Opt_debug, "debug"},
- {Opt_nodebug, "nodebug"},
- {Opt_upgrade, "upgrade"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_quota_off, "quota=off"},
- {Opt_quota_account, "quota=account"},
- {Opt_quota_on, "quota=on"},
- {Opt_quota, "quota"},
- {Opt_noquota, "noquota"},
- {Opt_suiddir, "suiddir"},
- {Opt_nosuiddir, "nosuiddir"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_data_ordered, "data=ordered"},
- {Opt_meta, "meta"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_commit, "commit=%d"},
- {Opt_err_withdraw, "errors=withdraw"},
- {Opt_err_panic, "errors=panic"},
- {Opt_statfs_quantum, "statfs_quantum=%d"},
- {Opt_statfs_percent, "statfs_percent=%d"},
- {Opt_quota_quantum, "quota_quantum=%d"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_rgrplvb, "rgrplvb"},
- {Opt_norgrplvb, "norgrplvb"},
- {Opt_loccookie, "loccookie"},
- {Opt_noloccookie, "noloccookie"},
- {Opt_error, NULL}
-};
-
-/**
- * gfs2_mount_args - Parse mount options
- * @args: The structure into which the parsed options will be written
- * @options: The options to parse
- *
- * Return: errno
- */
-
-int gfs2_mount_args(struct gfs2_args *args, char *options)
-{
- char *o;
- int token;
- substring_t tmp[MAX_OPT_ARGS];
- int rv;
-
- /* Split the options into tokens with the "," character and
- process them */
-
- while (1) {
- o = strsep(&options, ",");
- if (o == NULL)
- break;
- if (*o == '\0')
- continue;
-
- token = match_token(o, tokens, tmp);
- switch (token) {
- case Opt_lockproto:
- match_strlcpy(args->ar_lockproto, &tmp[0],
- GFS2_LOCKNAME_LEN);
- break;
- case Opt_locktable:
- match_strlcpy(args->ar_locktable, &tmp[0],
- GFS2_LOCKNAME_LEN);
- break;
- case Opt_hostdata:
- match_strlcpy(args->ar_hostdata, &tmp[0],
- GFS2_LOCKNAME_LEN);
- break;
- case Opt_spectator:
- args->ar_spectator = 1;
- break;
- case Opt_ignore_local_fs:
- /* Retained for backwards compat only */
- break;
- case Opt_localflocks:
- args->ar_localflocks = 1;
- break;
- case Opt_localcaching:
- /* Retained for backwards compat only */
- break;
- case Opt_debug:
- if (args->ar_errors == GFS2_ERRORS_PANIC) {
- pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
- return -EINVAL;
- }
- args->ar_debug = 1;
- break;
- case Opt_nodebug:
- args->ar_debug = 0;
- break;
- case Opt_upgrade:
- /* Retained for backwards compat only */
- break;
- case Opt_acl:
- args->ar_posix_acl = 1;
- break;
- case Opt_noacl:
- args->ar_posix_acl = 0;
- break;
- case Opt_quota_off:
- case Opt_noquota:
- args->ar_quota = GFS2_QUOTA_OFF;
- break;
- case Opt_quota_account:
- args->ar_quota = GFS2_QUOTA_ACCOUNT;
- break;
- case Opt_quota_on:
- case Opt_quota:
- args->ar_quota = GFS2_QUOTA_ON;
- break;
- case Opt_suiddir:
- args->ar_suiddir = 1;
- break;
- case Opt_nosuiddir:
- args->ar_suiddir = 0;
- break;
- case Opt_data_writeback:
- args->ar_data = GFS2_DATA_WRITEBACK;
- break;
- case Opt_data_ordered:
- args->ar_data = GFS2_DATA_ORDERED;
- break;
- case Opt_meta:
- args->ar_meta = 1;
- break;
- case Opt_discard:
- args->ar_discard = 1;
- break;
- case Opt_nodiscard:
- args->ar_discard = 0;
- break;
- case Opt_commit:
- rv = match_int(&tmp[0], &args->ar_commit);
- if (rv || args->ar_commit <= 0) {
- pr_warn("commit mount option requires a positive numeric argument\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_statfs_quantum:
- rv = match_int(&tmp[0], &args->ar_statfs_quantum);
- if (rv || args->ar_statfs_quantum < 0) {
- pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_quota_quantum:
- rv = match_int(&tmp[0], &args->ar_quota_quantum);
- if (rv || args->ar_quota_quantum <= 0) {
- pr_warn("quota_quantum mount option requires a positive numeric argument\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_statfs_percent:
- rv = match_int(&tmp[0], &args->ar_statfs_percent);
- if (rv || args->ar_statfs_percent < 0 ||
- args->ar_statfs_percent > 100) {
- pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_err_withdraw:
- args->ar_errors = GFS2_ERRORS_WITHDRAW;
- break;
- case Opt_err_panic:
- if (args->ar_debug) {
- pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
- return -EINVAL;
- }
- args->ar_errors = GFS2_ERRORS_PANIC;
- break;
- case Opt_barrier:
- args->ar_nobarrier = 0;
- break;
- case Opt_nobarrier:
- args->ar_nobarrier = 1;
- break;
- case Opt_rgrplvb:
- args->ar_rgrplvb = 1;
- break;
- case Opt_norgrplvb:
- args->ar_rgrplvb = 0;
- break;
- case Opt_loccookie:
- args->ar_loccookie = 1;
- break;
- case Opt_noloccookie:
- args->ar_loccookie = 0;
- break;
- case Opt_error:
- default:
- pr_warn("invalid mount option: %s\n", o);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
/**
* gfs2_jindex_free - Clear all the journal index information
* @sdp: The GFS2 superblock
@@ -847,7 +595,7 @@ out:
* Returns: errno
*/
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
struct gfs2_holder freeze_gh;
int error;
@@ -1227,84 +975,6 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
}
/**
- * gfs2_remount_fs - called when the FS is remounted
- * @sb: the filesystem
- * @flags: the remount flags
- * @data: extra data passed in (not used right now)
- *
- * Returns: errno
- */
-
-static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
-{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_args args = sdp->sd_args; /* Default to current settings */
- struct gfs2_tune *gt = &sdp->sd_tune;
- int error;
-
- sync_filesystem(sb);
-
- spin_lock(&gt->gt_spin);
- args.ar_commit = gt->gt_logd_secs;
- args.ar_quota_quantum = gt->gt_quota_quantum;
- if (gt->gt_statfs_slow)
- args.ar_statfs_quantum = 0;
- else
- args.ar_statfs_quantum = gt->gt_statfs_quantum;
- spin_unlock(&gt->gt_spin);
- error = gfs2_mount_args(&args, data);
- if (error)
- return error;
-
- /* Not allowed to change locking details */
- if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
- strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
- strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
- return -EINVAL;
-
- /* Some flags must not be changed */
- if (args_neq(&args, &sdp->sd_args, spectator) ||
- args_neq(&args, &sdp->sd_args, localflocks) ||
- args_neq(&args, &sdp->sd_args, meta))
- return -EINVAL;
-
- if (sdp->sd_args.ar_spectator)
- *flags |= SB_RDONLY;
-
- if ((sb->s_flags ^ *flags) & SB_RDONLY) {
- if (*flags & SB_RDONLY)
- error = gfs2_make_fs_ro(sdp);
- else
- error = gfs2_make_fs_rw(sdp);
- }
-
- sdp->sd_args = args;
- if (sdp->sd_args.ar_posix_acl)
- sb->s_flags |= SB_POSIXACL;
- else
- sb->s_flags &= ~SB_POSIXACL;
- if (sdp->sd_args.ar_nobarrier)
- set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
- else
- clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
- spin_lock(&gt->gt_spin);
- gt->gt_logd_secs = args.ar_commit;
- gt->gt_quota_quantum = args.ar_quota_quantum;
- if (args.ar_statfs_quantum) {
- gt->gt_statfs_slow = 0;
- gt->gt_statfs_quantum = args.ar_statfs_quantum;
- }
- else {
- gt->gt_statfs_slow = 1;
- gt->gt_statfs_quantum = 30;
- }
- spin_unlock(&gt->gt_spin);
-
- gfs2_online_uevent(sdp);
- return error;
-}
-
-/**
* gfs2_drop_inode - Drop an inode (test for remote unlink)
* @inode: The inode to drop
*
@@ -1748,7 +1418,6 @@ const struct super_operations gfs2_super_ops = {
.freeze_super = gfs2_freeze,
.thaw_super = gfs2_unfreeze,
.statfs = gfs2_statfs,
- .remount_fs = gfs2_remount_fs,
.drop_inode = gfs2_drop_inode,
.show_options = gfs2_show_options,
};
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 9d49eaadb9d9..b8bf811a1305 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -24,8 +24,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
-extern int gfs2_mount_args(struct gfs2_args *args, char *data);
-
extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
@@ -33,6 +31,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+extern int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
diff --git a/fs/inode.c b/fs/inode.c
index 64bf28cf05cd..fef457a42882 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->flags = 0;
mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ atomic_set(&mapping->nr_thps, 0);
+#endif
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->writeback_index = 0;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0dadbdbead0f..dd094b387cab 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -200,6 +200,7 @@ struct io_ring_ctx {
struct io_uring_sqe *sq_sqes;
struct list_head defer_list;
+ struct list_head timeout_list;
} ____cacheline_aligned_in_smp;
/* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
struct eventfd_ctx *cq_ev_fd;
+ atomic_t cq_timeouts;
} ____cacheline_aligned_in_smp;
struct io_rings *rings;
@@ -283,6 +285,11 @@ struct io_poll_iocb {
struct wait_queue_entry wait;
};
+struct io_timeout {
+ struct file *file;
+ struct hrtimer timer;
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -294,6 +301,7 @@ struct io_kiocb {
struct file *file;
struct kiocb rw;
struct io_poll_iocb poll;
+ struct io_timeout timeout;
};
struct sqe_submit submit;
@@ -313,6 +321,7 @@ struct io_kiocb {
#define REQ_F_LINK_DONE 128 /* linked sqes done */
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
+#define REQ_F_TIMEOUT 1024 /* timeout request */
u64 user_data;
u32 result;
u32 sequence;
@@ -344,6 +353,8 @@ struct io_submit_state {
};
static void io_sq_wq_submit_work(struct work_struct *work);
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+ long res);
static void __io_free_req(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
@@ -400,26 +411,30 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->cancel_list);
INIT_LIST_HEAD(&ctx->defer_list);
+ INIT_LIST_HEAD(&ctx->timeout_list);
return ctx;
}
static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
+ /* timeout requests always honor sequence */
+ if (!(req->flags & REQ_F_TIMEOUT) &&
+ (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
return false;
return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
}
-static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
+ struct list_head *list)
{
struct io_kiocb *req;
- if (list_empty(&ctx->defer_list))
+ if (list_empty(list))
return NULL;
- req = list_first_entry(&ctx->defer_list, struct io_kiocb, list);
+ req = list_first_entry(list, struct io_kiocb, list);
if (!io_sequence_defer(ctx, req)) {
list_del_init(&req->list);
return req;
@@ -428,6 +443,16 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
return NULL;
}
+static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+{
+ return __io_get_deferred_req(ctx, &ctx->defer_list);
+}
+
+static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
+{
+ return __io_get_deferred_req(ctx, &ctx->timeout_list);
+}
+
static void __io_commit_cqring(struct io_ring_ctx *ctx)
{
struct io_rings *rings = ctx->rings;
@@ -446,25 +471,50 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- int rw;
+ int rw = 0;
- switch (req->submit.sqe->opcode) {
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
- rw = !(req->rw.ki_flags & IOCB_DIRECT);
- break;
- default:
- rw = 0;
- break;
+ if (req->submit.sqe) {
+ switch (req->submit.sqe->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ rw = !(req->rw.ki_flags & IOCB_DIRECT);
+ break;
+ }
}
queue_work(ctx->sqo_wq[rw], &req->work);
}
+static void io_kill_timeout(struct io_kiocb *req)
+{
+ int ret;
+
+ ret = hrtimer_try_to_cancel(&req->timeout.timer);
+ if (ret != -1) {
+ atomic_inc(&req->ctx->cq_timeouts);
+ list_del(&req->list);
+ io_cqring_fill_event(req->ctx, req->user_data, 0);
+ __io_free_req(req);
+ }
+}
+
+static void io_kill_timeouts(struct io_ring_ctx *ctx)
+{
+ struct io_kiocb *req, *tmp;
+
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+ io_kill_timeout(req);
+ spin_unlock_irq(&ctx->completion_lock);
+}
+
static void io_commit_cqring(struct io_ring_ctx *ctx)
{
struct io_kiocb *req;
+ while ((req = io_get_timeout_req(ctx)) != NULL)
+ io_kill_timeout(req);
+
__io_commit_cqring(ctx);
while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -1248,6 +1298,51 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
}
}
+/*
+ * For files that don't have ->read_iter() and ->write_iter(), handle them
+ * by looping over ->read() or ->write() manually.
+ */
+static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
+ struct iov_iter *iter)
+{
+ ssize_t ret = 0;
+
+ /*
+ * Don't support polled IO through this interface, and we can't
+ * support non-blocking either. For the latter, this just causes
+ * the kiocb to be handled from an async context.
+ */
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EOPNOTSUPP;
+ if (kiocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ while (iov_iter_count(iter)) {
+ struct iovec iovec = iov_iter_iovec(iter);
+ ssize_t nr;
+
+ if (rw == READ) {
+ nr = file->f_op->read(file, iovec.iov_base,
+ iovec.iov_len, &kiocb->ki_pos);
+ } else {
+ nr = file->f_op->write(file, iovec.iov_base,
+ iovec.iov_len, &kiocb->ki_pos);
+ }
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != iovec.iov_len)
+ break;
+ iov_iter_advance(iter, nr);
+ }
+
+ return ret;
+}
+
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock)
{
@@ -1265,8 +1360,6 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
if (unlikely(!(file->f_mode & FMODE_READ)))
return -EBADF;
- if (unlikely(!file->f_op->read_iter))
- return -EINVAL;
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
if (ret < 0)
@@ -1281,7 +1374,11 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
if (!ret) {
ssize_t ret2;
- ret2 = call_read_iter(file, kiocb, &iter);
+ if (file->f_op->read_iter)
+ ret2 = call_read_iter(file, kiocb, &iter);
+ else
+ ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+
/*
* In case of a short read, punt to async. This can happen
* if we have data partially cached. Alternatively we can
@@ -1326,8 +1423,6 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
file = kiocb->ki_filp;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
- if (unlikely(!file->f_op->write_iter))
- return -EINVAL;
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
if (ret < 0)
@@ -1365,7 +1460,10 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
}
kiocb->ki_flags |= IOCB_WRITE;
- ret2 = call_write_iter(file, kiocb, &iter);
+ if (file->f_op->write_iter)
+ ret2 = call_write_iter(file, kiocb, &iter);
+ else
+ ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
} else {
@@ -1714,6 +1812,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!poll->file)
return -EBADF;
+ req->submit.sqe = NULL;
INIT_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
@@ -1765,6 +1864,81 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ipt.error;
}
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+ struct io_ring_ctx *ctx;
+ struct io_kiocb *req;
+ unsigned long flags;
+
+ req = container_of(timer, struct io_kiocb, timeout.timer);
+ ctx = req->ctx;
+ atomic_inc(&ctx->cq_timeouts);
+
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ list_del(&req->list);
+
+ io_cqring_fill_event(ctx, req->user_data, -ETIME);
+ io_commit_cqring(ctx);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+ io_cqring_ev_posted(ctx);
+
+ io_put_req(req);
+ return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ unsigned count, req_dist, tail_index;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct list_head *entry;
+ struct timespec ts;
+
+ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
+ sqe->len != 1)
+ return -EINVAL;
+ if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr,
+ sizeof(ts)))
+ return -EFAULT;
+
+ /*
+ * sqe->off holds how many events that need to occur for this
+ * timeout event to be satisfied.
+ */
+ count = READ_ONCE(sqe->off);
+ if (!count)
+ count = 1;
+
+ req->sequence = ctx->cached_sq_head + count - 1;
+ req->flags |= REQ_F_TIMEOUT;
+
+ /*
+ * Insertion sort, ensuring the first entry in the list is always
+ * the one we need first.
+ */
+ tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
+ req_dist = req->sequence - tail_index;
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_prev(entry, &ctx->timeout_list) {
+ struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+ unsigned dist;
+
+ dist = nxt->sequence - tail_index;
+ if (req_dist >= dist)
+ break;
+ }
+ list_add(&req->list, entry);
+ spin_unlock_irq(&ctx->completion_lock);
+
+ hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ req->timeout.timer.function = io_timeout_fn;
+ hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
+ HRTIMER_MODE_REL);
+ return 0;
+}
+
static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -1842,6 +2016,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_RECVMSG:
ret = io_recvmsg(req, s->sqe, force_nonblock);
break;
+ case IORING_OP_TIMEOUT:
+ ret = io_timeout(req, s->sqe);
+ break;
default:
ret = -EINVAL;
break;
@@ -2098,13 +2275,11 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
- sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+ sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
if (sqe_copy) {
struct async_list *list;
- memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
s->sqe = sqe_copy;
-
memcpy(&req->submit, s, sizeof(*s));
list = io_async_list_from_sqe(ctx, s->sqe);
if (!io_add_to_prev_work(list, req)) {
@@ -2359,18 +2534,22 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
io_queue_link_head(ctx, link, &link->submit, shadow_req,
true);
link = NULL;
+ shadow_req = NULL;
}
prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL);
+ if (unlikely(!shadow_req))
+ goto out;
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
shadow_req->sequence = sqes[i].sequence;
}
+out:
if (unlikely(mm_fault)) {
io_cqring_add_event(ctx, sqes[i].sqe->user_data,
-EFAULT);
@@ -2436,7 +2615,7 @@ static int io_sq_thread(void *data)
* to sleep.
*/
if (inflight || !time_after(jiffies, timeout)) {
- cpu_relax();
+ cond_resched();
continue;
}
@@ -2545,18 +2724,22 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
io_queue_link_head(ctx, link, &link->submit, shadow_req,
force_nonblock);
link = NULL;
+ shadow_req = NULL;
}
prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL);
+ if (unlikely(!shadow_req))
+ goto out;
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
shadow_req->sequence = s.sequence;
}
+out:
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
@@ -2593,6 +2776,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz)
{
struct io_rings *rings = ctx->rings;
+ unsigned nr_timeouts;
int ret;
if (io_cqring_events(rings) >= min_events)
@@ -2611,7 +2795,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
- ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
+ nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ /*
+ * Return if we have enough events, or if a timeout occured since
+ * we started waiting. For timeouts, we always want to return to
+ * userspace.
+ */
+ ret = wait_event_interruptible(ctx->wait,
+ io_cqring_events(rings) >= min_events ||
+ atomic_read(&ctx->cq_timeouts) != nr_timeouts);
restore_saved_sigmask_unless(ret == -ERESTARTSYS);
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -3282,6 +3474,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);
+ io_kill_timeouts(ctx);
io_poll_remove_all(ctx);
io_iopoll_reap_events(ctx);
wait_for_completion(&ctx->ctx_done);
@@ -3319,7 +3512,7 @@ static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
}
page = virt_to_head_page(ptr);
- if (sz > (PAGE_SIZE << compound_order(page)))
+ if (sz > page_size(page))
return -EINVAL;
pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 10517cea9682..1fc28c2da279 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -24,7 +24,7 @@
struct iomap_dio {
struct kiocb *iocb;
- iomap_dio_end_io_t *end_io;
+ const struct iomap_dio_ops *dops;
loff_t i_size;
loff_t size;
atomic_t ref;
@@ -72,18 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
+ const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
- ssize_t ret;
+ ssize_t ret = dio->error;
- if (dio->end_io) {
- ret = dio->end_io(iocb,
- dio->error ? dio->error : dio->size,
- dio->flags);
- } else {
- ret = dio->error;
- }
+ if (dops && dops->end_io)
+ ret = dops->end_io(iocb, dio->size, ret, dio->flags);
if (likely(!ret)) {
ret = dio->size;
@@ -101,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*
- * And this page cache invalidation has to be after dio->end_io(), as
- * some filesystems convert unwritten extents to real allocations in
- * end_io() when necessary, otherwise a racing buffer read would cache
+ * And this page cache invalidation has to be after ->end_io(), as some
+ * filesystems convert unwritten extents to real allocations in
+ * ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
if (!dio->error &&
@@ -396,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
*/
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
- const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
@@ -421,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
atomic_set(&dio->ref, 1);
dio->size = 0;
dio->i_size = i_size_read(inode);
- dio->end_io = end_io;
+ dio->dops = dops;
dio->error = 0;
dio->flags = 0;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 953990eb70a9..1c58859aa592 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -89,8 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
EXPORT_SYMBOL(jbd2_journal_invalidatepage);
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index afc06daee5bb..bee8498d7792 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2622,18 +2622,6 @@ done:
return 0;
}
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
-{
- return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
-}
-
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
-{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
- LLONG_MAX);
-}
-
int jbd2_journal_inode_ranged_write(handle_t *handle,
struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
{
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0c335b51043d..f9baefc76cf9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5993,6 +5993,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
struct buffer_head *data_alloc_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
+ struct ocfs2_journal *journal = osb->journal;
BUG_ON(inode_trylock(tl_inode));
@@ -6013,6 +6014,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out;
}
+ /* Appending truncate log(TA) and and flushing truncate log(TF) are
+ * two separated transactions. They can be both committed but not
+ * checkpointed. If crash occurs then, both two transaction will be
+ * replayed with several already released to global bitmap clusters.
+ * Then truncate log will be replayed resulting in cluster double free.
+ */
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
data_alloc_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
@@ -6792,6 +6807,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
struct page *page, int zero, u64 *phys)
{
int ret, partial = 0;
+ loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+ loff_t length = to - from;
ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
if (ret)
@@ -6811,7 +6828,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
if (ret < 0)
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
+ ret = ocfs2_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret < 0)
mlog_errno(ret);
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a4c905d6b575..8de1c9d644f6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -942,7 +942,8 @@ static void ocfs2_write_failure(struct inode *inode,
if (tmppage && page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
- ocfs2_jbd2_file_inode(wc->w_handle, inode);
+ ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
+ user_pos, user_len);
block_commit_write(tmppage, from, to);
}
@@ -2023,8 +2024,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
- if (handle && ocfs2_should_order_data(inode))
- ocfs2_jbd2_file_inode(handle, inode);
+ if (handle && ocfs2_should_order_data(inode)) {
+ loff_t start_byte =
+ ((loff_t)tmppage->index << PAGE_SHIFT) +
+ from;
+ loff_t length = to - from;
+ ocfs2_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
+ }
block_commit_write(tmppage, from, to);
}
}
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 429e6a8359a5..eaf042feaf5e 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -231,14 +231,6 @@ static int blockcheck_u64_get(void *data, u64 *val)
}
DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
-static struct dentry *blockcheck_debugfs_create(const char *name,
- struct dentry *parent,
- u64 *value)
-{
- return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
- &blockcheck_fops);
-}
-
static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
{
if (stats) {
@@ -250,16 +242,20 @@ static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
struct dentry *parent)
{
- stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("blockcheck", parent);
+ stats->b_debug_dir = dir;
+
+ debugfs_create_file("blocks_checked", S_IFREG | S_IRUSR, dir,
+ &stats->b_check_count, &blockcheck_fops);
- blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
- &stats->b_check_count);
+ debugfs_create_file("checksums_failed", S_IFREG | S_IRUSR, dir,
+ &stats->b_failure_count, &blockcheck_fops);
- blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
- &stats->b_failure_count);
+ debugfs_create_file("ecc_recoveries", S_IFREG | S_IRUSR, dir,
+ &stats->b_recover_count, &blockcheck_fops);
- blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
- &stats->b_recover_count);
}
#else
static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f1b613327ac8..a368350d4c27 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -225,10 +225,6 @@ struct o2hb_region {
unsigned int hr_region_num;
struct dentry *hr_debug_dir;
- struct dentry *hr_debug_livenodes;
- struct dentry *hr_debug_regnum;
- struct dentry *hr_debug_elapsed_time;
- struct dentry *hr_debug_pinned;
struct o2hb_debug_buf *hr_db_livenodes;
struct o2hb_debug_buf *hr_db_regnum;
struct o2hb_debug_buf *hr_db_elapsed_time;
@@ -1394,21 +1390,20 @@ void o2hb_exit(void)
kfree(o2hb_db_failedregions);
}
-static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
- struct o2hb_debug_buf **db, int db_len,
- int type, int size, int len, void *data)
+static void o2hb_debug_create(const char *name, struct dentry *dir,
+ struct o2hb_debug_buf **db, int db_len, int type,
+ int size, int len, void *data)
{
*db = kmalloc(db_len, GFP_KERNEL);
if (!*db)
- return NULL;
+ return;
(*db)->db_type = type;
(*db)->db_size = size;
(*db)->db_len = len;
(*db)->db_data = data;
- return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db,
- &o2hb_debug_fops);
+ debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, &o2hb_debug_fops);
}
static void o2hb_debug_init(void)
@@ -1525,11 +1520,7 @@ static void o2hb_region_release(struct config_item *item)
kfree(reg->hr_slots);
- debugfs_remove(reg->hr_debug_livenodes);
- debugfs_remove(reg->hr_debug_regnum);
- debugfs_remove(reg->hr_debug_elapsed_time);
- debugfs_remove(reg->hr_debug_pinned);
- debugfs_remove(reg->hr_debug_dir);
+ debugfs_remove_recursive(reg->hr_debug_dir);
kfree(reg->hr_db_livenodes);
kfree(reg->hr_db_regnum);
kfree(reg->hr_db_elapsed_time);
@@ -1988,69 +1979,33 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
: NULL;
}
-static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
+static void o2hb_debug_region_init(struct o2hb_region *reg,
+ struct dentry *parent)
{
- int ret = -ENOMEM;
+ struct dentry *dir;
- reg->hr_debug_dir =
- debugfs_create_dir(config_item_name(&reg->hr_item), dir);
- if (!reg->hr_debug_dir) {
- mlog_errno(ret);
- goto bail;
- }
+ dir = debugfs_create_dir(config_item_name(&reg->hr_item), parent);
+ reg->hr_debug_dir = dir;
- reg->hr_debug_livenodes =
- o2hb_debug_create(O2HB_DEBUG_LIVENODES,
- reg->hr_debug_dir,
- &(reg->hr_db_livenodes),
- sizeof(*(reg->hr_db_livenodes)),
- O2HB_DB_TYPE_REGION_LIVENODES,
- sizeof(reg->hr_live_node_bitmap),
- O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_livenodes) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_LIVENODES, dir, &(reg->hr_db_livenodes),
+ sizeof(*(reg->hr_db_livenodes)),
+ O2HB_DB_TYPE_REGION_LIVENODES,
+ sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES,
+ reg);
- reg->hr_debug_regnum =
- o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER,
- reg->hr_debug_dir,
- &(reg->hr_db_regnum),
- sizeof(*(reg->hr_db_regnum)),
- O2HB_DB_TYPE_REGION_NUMBER,
- 0, O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_regnum) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, dir, &(reg->hr_db_regnum),
+ sizeof(*(reg->hr_db_regnum)),
+ O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg);
- reg->hr_debug_elapsed_time =
- o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME,
- reg->hr_debug_dir,
- &(reg->hr_db_elapsed_time),
- sizeof(*(reg->hr_db_elapsed_time)),
- O2HB_DB_TYPE_REGION_ELAPSED_TIME,
- 0, 0, reg);
- if (!reg->hr_debug_elapsed_time) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, dir,
+ &(reg->hr_db_elapsed_time),
+ sizeof(*(reg->hr_db_elapsed_time)),
+ O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg);
- reg->hr_debug_pinned =
- o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
- reg->hr_debug_dir,
- &(reg->hr_db_pinned),
- sizeof(*(reg->hr_db_pinned)),
- O2HB_DB_TYPE_REGION_PINNED,
- 0, 0, reg);
- if (!reg->hr_debug_pinned) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, dir, &(reg->hr_db_pinned),
+ sizeof(*(reg->hr_db_pinned)),
+ O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg);
- ret = 0;
-bail:
- return ret;
}
static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
@@ -2106,11 +2061,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
if (ret)
goto unregister_handler;
- ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
- if (ret) {
- config_item_put(&reg->hr_item);
- goto unregister_handler;
- }
+ o2hb_debug_region_init(reg, o2hb_debug_dir);
return &reg->hr_item;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 784426dee56c..bdef72c0f099 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3636,7 +3636,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
int i, j, num_used;
u32 major_hash;
struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
- struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
+ struct ocfs2_dx_entry_list *orig_list, *tmp_list;
struct ocfs2_dx_entry *dx_entry;
tmp_list = &tmp_dx_leaf->dl_list;
@@ -3645,7 +3645,6 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
orig_list = &orig_dx_leaf->dl_list;
new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
- new_list = &new_dx_leaf->dl_list;
num_used = le16_to_cpu(orig_list->de_num_used);
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 69a429b625cc..aaf24548b02a 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -142,7 +142,6 @@ struct dlm_ctxt
atomic_t res_tot_count;
atomic_t res_cur_count;
- struct dlm_debug_ctxt *dlm_debug_ctxt;
struct dentry *dlm_debugfs_subroot;
/* NOTE: Next three are protected by dlm_domain_lock */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index a4b58ba99927..4d0b452012b2 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -853,67 +853,34 @@ static const struct file_operations debug_state_fops = {
/* files in subroot */
void dlm_debug_init(struct dlm_ctxt *dlm)
{
- struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
/* for dumping dlm_ctxt */
- dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_state_fops);
+ debugfs_create_file(DLM_DEBUGFS_DLM_STATE, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm, &debug_state_fops);
/* for dumping lockres */
- dc->debug_lockres_dentry =
- debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_lockres_fops);
+ debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops);
/* for dumping mles */
- dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_mle_fops);
+ debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops);
/* for dumping lockres on the purge list */
- dc->debug_purgelist_dentry =
- debugfs_create_file(DLM_DEBUGFS_PURGE_LIST,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_purgelist_fops);
-}
-
-void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
- struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
- if (dc) {
- debugfs_remove(dc->debug_purgelist_dentry);
- debugfs_remove(dc->debug_mle_dentry);
- debugfs_remove(dc->debug_lockres_dentry);
- debugfs_remove(dc->debug_state_dentry);
- kfree(dc);
- dc = NULL;
- }
+ debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm,
+ &debug_purgelist_fops);
}
/* subroot - domain dir */
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
{
- dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
- GFP_KERNEL);
- if (!dlm->dlm_debug_ctxt) {
- mlog_errno(-ENOMEM);
- return -ENOMEM;
- }
-
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
dlm_debugfs_root);
- return 0;
}
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
- debugfs_remove(dlm->dlm_debugfs_subroot);
+ debugfs_remove_recursive(dlm->dlm_debugfs_subroot);
}
/* debugfs root */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 7d0c7c9013ce..f8fd8680a4b6 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -14,13 +14,6 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle);
#ifdef CONFIG_DEBUG_FS
-struct dlm_debug_ctxt {
- struct dentry *debug_state_dentry;
- struct dentry *debug_lockres_dentry;
- struct dentry *debug_mle_dentry;
- struct dentry *debug_purgelist_dentry;
-};
-
struct debug_lockres {
int dl_len;
char *dl_buf;
@@ -29,9 +22,8 @@ struct debug_lockres {
};
void dlm_debug_init(struct dlm_ctxt *dlm);
-void dlm_debug_shutdown(struct dlm_ctxt *dlm);
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
void dlm_create_debugfs_root(void);
@@ -42,12 +34,8 @@ void dlm_destroy_debugfs_root(void);
static inline void dlm_debug_init(struct dlm_ctxt *dlm)
{
}
-static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
-}
-static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+static inline void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
{
- return 0;
}
static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7338b5d4647c..ee6f459f9770 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -387,7 +387,6 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
{
dlm_unregister_domain_handlers(dlm);
- dlm_debug_shutdown(dlm);
dlm_complete_thread(dlm);
dlm_complete_recovery_thread(dlm);
dlm_destroy_dlm_worker(dlm);
@@ -1938,7 +1937,6 @@ bail:
if (status) {
dlm_unregister_domain_handlers(dlm);
- dlm_debug_shutdown(dlm);
dlm_complete_thread(dlm);
dlm_complete_recovery_thread(dlm);
dlm_destroy_dlm_worker(dlm);
@@ -1992,9 +1990,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->key = key;
dlm->node_num = o2nm_this_node();
- ret = dlm_create_debugfs_subroot(dlm);
- if (ret < 0)
- goto leave;
+ dlm_create_debugfs_subroot(dlm);
spin_lock_init(&dlm->spinlock);
spin_lock_init(&dlm->master_lock);
@@ -2056,6 +2052,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
mlog(0, "context init: refcount %u\n",
kref_read(&dlm->dlm_refs));
+ ret = 0;
leave:
if (ret < 0 && dlm) {
if (dlm->master_hash)
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index e78657742bd8..3883633e82eb 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -90,7 +90,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
enum dlm_status status;
int actions = 0;
int in_use;
- u8 owner;
+ u8 owner;
+ int recovery_wait = 0;
mlog(0, "master_node = %d, valblk = %d\n", master_node,
flags & LKM_VALBLK);
@@ -193,9 +194,12 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
}
if (flags & LKM_CANCEL)
lock->cancel_pending = 0;
- else
- lock->unlock_pending = 0;
-
+ else {
+ if (!lock->unlock_pending)
+ recovery_wait = 1;
+ else
+ lock->unlock_pending = 0;
+ }
}
/* get an extra ref on lock. if we are just switching
@@ -229,6 +233,17 @@ leave:
spin_unlock(&res->spinlock);
wake_up(&res->wq);
+ if (recovery_wait) {
+ spin_lock(&res->spinlock);
+ /* Unlock request will directly succeed after owner dies,
+ * and the lock is already removed from grant list. We have to
+ * wait for RECOVERING done or we miss the chance to purge it
+ * since the removement is much faster than RECOVERING proc.
+ */
+ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING);
+ spin_unlock(&res->spinlock);
+ }
+
/* let the caller's final dlm_lock_put handle the actual kfree */
if (actions & DLM_UNLOCK_FREE_LOCK) {
/* this should always be coupled with list removal */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 14207234fa3d..6e774c5ea13b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2508,9 +2508,7 @@ bail:
ocfs2_inode_unlock(inode, ex);
}
- if (local_bh)
- brelse(local_bh);
-
+ brelse(local_bh);
return status;
}
@@ -2593,8 +2591,7 @@ int ocfs2_inode_lock_atime(struct inode *inode,
*level = 1;
if (ocfs2_should_update_atime(inode, vfsmnt))
ocfs2_update_inode_atime(inode, bh);
- if (bh)
- brelse(bh);
+ brelse(bh);
} else
*level = 0;
@@ -3012,8 +3009,6 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
kref_init(&dlm_debug->d_refcnt);
INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
- dlm_debug->d_locking_state = NULL;
- dlm_debug->d_locking_filter = NULL;
dlm_debug->d_filter_secs = 0;
out:
return dlm_debug;
@@ -3282,27 +3277,19 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
{
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
- dlm_debug->d_locking_state = debugfs_create_file("locking_state",
- S_IFREG|S_IRUSR,
- osb->osb_debug_root,
- osb,
- &ocfs2_dlm_debug_fops);
+ debugfs_create_file("locking_state", S_IFREG|S_IRUSR,
+ osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops);
- dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
- 0600,
- osb->osb_debug_root,
- &dlm_debug->d_filter_secs);
+ debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
+ &dlm_debug->d_filter_secs);
}
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
{
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
- if (dlm_debug) {
- debugfs_remove(dlm_debug->d_locking_state);
- debugfs_remove(dlm_debug->d_locking_filter);
+ if (dlm_debug)
ocfs2_put_dlm_debug(dlm_debug);
- }
}
int ocfs2_dlm_init(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e66a249fe07c..e3e2d1b2af51 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -590,8 +590,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
*extent_flags = rec->e_flags;
}
out:
- if (eb_bh)
- brelse(eb_bh);
+ brelse(eb_bh);
return ret;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4435df3e5adb..2e982db3e1ae 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -706,7 +706,9 @@ leave:
* Thus, we need to explicitly order the zeroed pages.
*/
static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
- struct buffer_head *di_bh)
+ struct buffer_head *di_bh,
+ loff_t start_byte,
+ loff_t length)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
handle_t *handle = NULL;
@@ -722,7 +724,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
goto out;
}
- ret = ocfs2_jbd2_file_inode(handle, inode);
+ ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -761,7 +763,9 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
BUG_ON(abs_from & (inode->i_blkbits - 1));
- handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
+ handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
+ abs_from,
+ abs_to - abs_from);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -2126,7 +2130,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
struct buffer_head *di_bh = NULL;
- loff_t end;
/*
* We start with a read level meta lock and only jump to an ex
@@ -2190,8 +2193,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
}
}
- end = pos + count;
-
ret = ocfs2_check_range_for_refcount(inode, pos, count);
if (ret == 1) {
ocfs2_inode_unlock(inode, meta_level);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7ad9d6590818..7c9dfd50c1c1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -534,7 +534,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
*/
mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
!!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
- "Inode %llu: system file state is ambigous\n",
+ "Inode %llu: system file state is ambiguous\n",
(unsigned long long)args->fi_blkno);
if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index c0fe6ed08ab1..3103ba7f97a2 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,6 @@ static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
-void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
void ocfs2_complete_recovery(struct work_struct *work);
void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
@@ -232,8 +231,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
* ocfs2_journal_access_*() unless you intend to
* manage the checksum by hand.
* ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
- * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
- * the current handle commits.
+ * ocfs2_jbd2_inode_add_write - Mark an inode with range so that its data goes
+ * out before the current handle commits.
*/
/* You must always start_trans with a number of buffs > 0, but it's
@@ -441,7 +440,7 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
* previous dirblock update in the free list */
static inline int ocfs2_link_credits(struct super_block *sb)
{
- return 2*OCFS2_INODE_UPDATE_CREDITS + 4 +
+ return 2 * OCFS2_INODE_UPDATE_CREDITS + 4 +
ocfs2_quota_trans_credits(sb);
}
@@ -575,37 +574,12 @@ static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
return ocfs2_extent_recs_per_gd(sb);
}
-static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
- unsigned int clusters_to_del,
- struct ocfs2_dinode *fe,
- struct ocfs2_extent_list *last_el)
+static inline int ocfs2_jbd2_inode_add_write(handle_t *handle, struct inode *inode,
+ loff_t start_byte, loff_t length)
{
- /* for dinode + all headers in this pass + update to next leaf */
- u16 next_free = le16_to_cpu(last_el->l_next_free_rec);
- u16 tree_depth = le16_to_cpu(fe->id2.i_list.l_tree_depth);
- int credits = 1 + tree_depth + 1;
- int i;
-
- i = next_free - 1;
- BUG_ON(i < 0);
-
- /* We may be deleting metadata blocks, so metadata alloc dinode +
- one desc. block for each possible delete. */
- if (tree_depth && next_free == 1 &&
- ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del)
- credits += 1 + tree_depth;
-
- /* update to the truncate log. */
- credits += OCFS2_TRUNCATE_LOG_UPDATE;
-
- credits += ocfs2_quota_trans_credits(sb);
-
- return credits;
-}
-
-static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
-{
- return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode);
+ return jbd2_journal_inode_ranged_write(handle,
+ &OCFS2_I(inode)->ip_jinode,
+ start_byte, length);
}
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6f8e1c4fdb9c..8ea51cf27b97 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2486,7 +2486,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
struct inode *inode = NULL;
struct inode *orphan_dir = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
- struct ocfs2_dinode *di = NULL;
handle_t *handle = NULL;
char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
struct buffer_head *parent_di_bh = NULL;
@@ -2552,7 +2551,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
goto leave;
}
- di = (struct ocfs2_dinode *)new_di_bh->b_data;
status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
&orphan_insert, orphan_dir, false);
if (status < 0) {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fddbbd60f434..9150cfa4df7d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -223,8 +223,6 @@ struct ocfs2_orphan_scan {
struct ocfs2_dlm_debug {
struct kref d_refcnt;
- struct dentry *d_locking_state;
- struct dentry *d_locking_filter;
u32 d_filter_secs;
struct list_head d_lockres_tracking;
};
@@ -401,7 +399,6 @@ struct ocfs2_super
struct ocfs2_dlm_debug *osb_dlm_debug;
struct dentry *osb_debug_root;
- struct dentry *osb_ctxt;
wait_queue_head_t recovery_event;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8b2f39506648..c81e86c62380 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1080,10 +1080,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
ocfs2_debugfs_root);
- osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
- osb->osb_debug_root,
- osb,
- &ocfs2_osb_debug_fops);
+ debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
+ osb, &ocfs2_osb_debug_fops);
if (ocfs2_meta_ecc(osb))
ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
@@ -1861,8 +1859,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
kset_unregister(osb->osb_dev_kset);
- debugfs_remove(osb->osb_ctxt);
-
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
@@ -1918,7 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
ocfs2_dlm_shutdown(osb, hangup_needed);
ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
- debugfs_remove(osb->osb_debug_root);
+ debugfs_remove_recursive(osb->osb_debug_root);
if (hangup_needed)
ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
diff --git a/fs/open.c b/fs/open.c
index a59abe3c669a..c60cd22cc052 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f,
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
return -EINVAL;
}
+
+ /*
+ * XXX: Huge page cache doesn't support writing yet. Drop all page
+ * cache for this file before processing writes.
+ */
+ if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
+ truncate_pagecache(inode, 0);
+
return 0;
cleanup_all:
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 465ea0153b2a..ac9247371871 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -8,7 +8,6 @@
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
#include <linux/percpu.h>
-#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/vmstat.h>
@@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
- show_val_kb(m, "Quicklists: ", quicklist_total_size());
-#endif
show_val_kb(m, "NFS_Unstable: ",
global_node_page_state(NR_UNSTABLE_NFS));
@@ -136,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+ show_val_kb(m, "FileHugePages: ",
+ global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+ show_val_kb(m, "FilePmdMapped: ",
+ global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
#endif
#ifdef CONFIG_CMA
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index bf43d1d60059..9442631fd4af 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -417,6 +417,7 @@ struct mem_size_stats {
unsigned long lazyfree;
unsigned long anonymous_thp;
unsigned long shmem_thp;
+ unsigned long file_thp;
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
@@ -461,7 +462,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked)
{
- int i, nr = compound ? 1 << compound_order(page) : 1;
+ int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
/*
@@ -588,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
else if (is_zone_device_page(page))
/* pass */;
else
- VM_BUG_ON_PAGE(1, page);
+ mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
@@ -809,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
mss->private_hugetlb >> 10, 7);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d952d5962e93..1ffb179f35d2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -370,21 +370,23 @@ static int
xfs_dio_write_end_io(
struct kiocb *iocb,
ssize_t size,
+ int error,
unsigned flags)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
unsigned int nofs_flag;
- int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (size <= 0)
- return size;
+ if (error)
+ return error;
+ if (!size)
+ return 0;
/*
* Capture amount written on completion as we can't reliably account
@@ -441,6 +443,10 @@ out:
return error;
}
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+ .end_io = xfs_dio_write_end_io,
+};
+
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
@@ -541,7 +547,7 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
- ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+ ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
/*
* If unaligned, this is the only IO in-flight. If it has not yet
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 8476175c07e7..6f8cc06ee44e 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -102,11 +102,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
__free_page(pte_page);
}
-#else /* CONFIG_MMU */
-
-/* This is enough for a nommu architecture */
-#define check_pgt_cache() do { } while (0)
-
#endif /* CONFIG_MMU */
#endif /* __ASM_GENERIC_PGALLOC_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 75d9d68a6de7..818691846c90 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1002,9 +1002,8 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
* need this). If THP is not enabled, the pmd can't go away under the
* code even if MADV_DONTNEED runs, but if THP is enabled we need to
* run a pmd_trans_unstable before walking the ptes after
- * split_huge_page_pmd returns (because it may have run when the pmd
- * become null, but then a page fault can map in a THP and not a
- * regular page).
+ * split_huge_pmd returns (because it may have run when the pmd become
+ * null, but then a page fault can map in a THP and not a regular page).
*/
static inline int pmd_trans_unstable(pmd_t *pmd)
{
@@ -1126,7 +1125,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
static inline void init_espfix_bsp(void) { }
#endif
-extern void __init pgd_cache_init(void);
+extern void __init pgtable_cache_init(void);
#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d9db32fb75ee..f3ea78b0c91c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1524,10 +1524,14 @@ struct blk_integrity_iter {
};
typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef void (integrity_prepare_fn) (struct request *);
+typedef void (integrity_complete_fn) (struct request *, unsigned int);
struct blk_integrity_profile {
integrity_processing_fn *generate_fn;
integrity_processing_fn *verify_fn;
+ integrity_prepare_fn *prepare_fn;
+ integrity_complete_fn *complete_fn;
const char *name;
};
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 9569e7c786d3..4b898cdbdf05 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -129,11 +129,8 @@ static inline bool compaction_failed(enum compact_result result)
return false;
}
-/*
- * Compaction has backed off for some reason. It might be throttling or
- * lock contention. Retrying is still worthwhile.
- */
-static inline bool compaction_withdrawn(enum compact_result result)
+/* Compaction needs reclaim to be performed first, so it can continue. */
+static inline bool compaction_needs_reclaim(enum compact_result result)
{
/*
* Compaction backed off due to watermark checks for order-0
@@ -142,6 +139,16 @@ static inline bool compaction_withdrawn(enum compact_result result)
if (result == COMPACT_SKIPPED)
return true;
+ return false;
+}
+
+/*
+ * Compaction has backed off for some reason after doing some work or none
+ * at all. It might be throttling or lock contention. Retrying might be still
+ * worthwhile, but with a higher priority if allowed.
+ */
+static inline bool compaction_withdrawn(enum compact_result result)
+{
/*
* If compaction is deferred for high-order allocations, it is
* because sync compaction recently failed. If this is the case
@@ -207,6 +214,11 @@ static inline bool compaction_failed(enum compact_result result)
return false;
}
+static inline bool compaction_needs_reclaim(enum compact_result result)
+{
+ return false;
+}
+
static inline bool compaction_withdrawn(enum compact_result result)
{
return true;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 866268c2c6e3..b0c6b0d34d02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
* @i_pages: Cached pages.
* @gfp_mask: Memory allocation flags to use for allocating pages.
* @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
* @nrpages: Number of page entries, protected by the i_pages lock.
@@ -446,6 +447,10 @@ struct address_space {
struct xarray i_pages;
gfp_t gfp_mask;
atomic_t i_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ /* number of thp, only for non-shmem files */
+ atomic_t nr_thps;
+#endif
struct rb_root_cached i_mmap;
struct rw_semaphore i_mmap_rwsem;
unsigned long nrpages;
@@ -2798,6 +2803,33 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
return errseq_sample(&mapping->wb_err);
}
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ return atomic_read(&mapping->nr_thps);
+#else
+ return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ atomic_inc(&mapping->nr_thps);
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ atomic_dec(&mapping->nr_thps);
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync);
extern int vfs_fsync(struct file *file, int datasync);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 45ede62aa85b..61c9ffd89b05 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -267,6 +267,15 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
+static inline struct list_head *page_deferred_list(struct page *page)
+{
+ /*
+ * Global or memcg deferred list in the second tail pages is
+ * occupied by compound_head.
+ */
+ return &page[2].deferred_list;
+}
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index edfca4278319..53fc34f930d0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -454,7 +454,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
static inline struct hstate *page_hstate(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
- return size_to_hstate(PAGE_SIZE << compound_order(page));
+ return size_to_hstate(page_size(page));
}
static inline unsigned hstate_index_to_shift(unsigned index)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2afe6fdc1dda..b4a017093b69 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
} pipe;
} u;
/*
- * The sub_channel_index is defined in win8.
+ * The sub_channel_index is defined in Win8: a value of zero means a
+ * primary channel and a value of non-zero means a sub-channel.
+ *
+ * Before Win8, the field is reserved, meaning it's always zero.
*/
u16 sub_channel_index;
u16 reserved3;
@@ -423,6 +426,9 @@ enum vmbus_channel_message_type {
CHANNELMSG_COUNT
};
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID U32_MAX
+
struct vmbus_channel_message_header {
enum vmbus_channel_message_type msgtype;
u32 padding;
@@ -934,6 +940,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
}
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+ return c->offermsg.offer.sub_channel_index != 0;
+}
+
static inline void set_channel_affinity_state(struct vmbus_channel *c,
enum hv_numa_policy policy)
{
@@ -1149,6 +1160,9 @@ struct hv_driver {
int (*remove)(struct hv_device *);
void (*shutdown)(struct hv_device *);
+ int (*suspend)(struct hv_device *);
+ int (*resume)(struct hv_device *);
+
};
/* Base device object */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index c0a78c069117..1361637c369d 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -473,7 +473,7 @@ extern struct i2c_client *
devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address);
extern struct i2c_client *
-i2c_new_secondary_device(struct i2c_client *client,
+i2c_new_ancillary_device(struct i2c_client *client,
const char *name,
u16 default_addr);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index bc499ceae392..7aa5d6117936 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
*/
#define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */
#define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */
-typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
- unsigned flags);
+
+struct iomap_dio_ops {
+ int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
+ unsigned flags);
+};
+
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
- const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
#ifdef CONFIG_SWAP
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index df03825ad1a1..603fbc4e2f70 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1410,8 +1410,6 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_force_commit_nested(journal_t *);
-extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
-extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_ranged_write(handle_t *handle,
struct jbd2_inode *inode, loff_t start_byte,
loff_t length);
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 082d1d2a5216..bc45ea1efbf7 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,6 +15,14 @@ extern int __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags);
+#ifdef CONFIG_SHMEM
+extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
+#else
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+ unsigned long addr)
+{
+}
+#endif
#define khugepaged_enabled() \
(transparent_hugepage_flags & \
@@ -73,6 +81,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
{
return 0;
}
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+ unsigned long addr)
+{
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_KHUGEPAGED_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ad8f1a397ae4..9b60863429cc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -128,9 +128,8 @@ struct mem_cgroup_per_node {
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
-#ifdef CONFIG_MEMCG_KMEM
struct memcg_shrinker_map __rcu *shrinker_map;
-#endif
+
struct rb_node tree_node; /* RB tree node */
unsigned long usage_in_excess;/* Set to the value by which */
/* the soft limit is exceeded*/
@@ -331,6 +330,10 @@ struct mem_cgroup {
struct list_head event_list;
spinlock_t event_list_lock;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct deferred_split deferred_split_queue;
+#endif
+
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
@@ -1311,6 +1314,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
} while ((memcg = parent_mem_cgroup(memcg)));
return false;
}
+
+extern int memcg_expand_shrinker_maps(int new_id);
+
+extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+ int nid, int shrinker_id);
#else
#define mem_cgroup_sockets_enabled 0
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1319,6 +1327,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
return false;
}
+
+static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+ int nid, int shrinker_id)
+{
+}
#endif
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
@@ -1390,10 +1403,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
return memcg ? memcg->kmemcg_id : -1;
}
-extern int memcg_expand_shrinker_maps(int new_id);
-
-extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
- int nid, int shrinker_id);
#else
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1435,8 +1444,6 @@ static inline void memcg_put_cache_ids(void)
{
}
-static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
- int nid, int shrinker_id) { }
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 02e633f3ede0..0ebb105eb261 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -25,7 +25,6 @@
struct memory_block {
unsigned long start_section_nr;
- unsigned long end_section_nr;
unsigned long state; /* serialized by the dev->lock */
int section_count; /* serialized by mem_sysfs_mutex */
int online_type; /* for passing data to online routine */
@@ -80,9 +79,9 @@ struct mem_section;
#define IPC_CALLBACK_PRI 10
#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
-static inline int memory_dev_init(void)
+static inline void memory_dev_init(void)
{
- return 0;
+ return;
}
static inline int register_memory_notifier(struct notifier_block *nb)
{
@@ -113,7 +112,7 @@ extern int register_memory_isolate_notifier(struct notifier_block *nb);
extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size);
void remove_memory_block_devices(unsigned long start, unsigned long size);
-extern int memory_dev_init(void);
+extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
extern int memory_isolate_notify(unsigned long val, void *v);
extern struct memory_block *find_memory_block(struct mem_section *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cf955feb823..294a67b94147 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -805,6 +805,24 @@ static inline void set_compound_order(struct page *page, unsigned int order)
page[1].compound_order = order;
}
+/* Returns the number of pages in this potentially compound page. */
+static inline unsigned long compound_nr(struct page *page)
+{
+ return 1UL << compound_order(page);
+}
+
+/* Returns the number of bytes in this potentially compound page. */
+static inline unsigned long page_size(struct page *page)
+{
+ return PAGE_SIZE << compound_order(page);
+}
+
+/* Returns the number of bits needed for the number of bytes in a page */
+static inline unsigned int page_shift(struct page *page)
+{
+ return PAGE_SHIFT + compound_order(page);
+}
+
void free_compound_page(struct page *page);
#ifdef CONFIG_MMU
@@ -1057,8 +1075,9 @@ static inline void put_user_page(struct page *page)
put_page(page);
}
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+ bool make_dirty);
+
void put_user_pages(struct page **pages, unsigned long npages);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -1405,7 +1424,11 @@ extern void pagefault_out_of_memory(void);
extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
+#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
+#else
+static inline bool can_do_mlock(void) { return false; }
+#endif
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
@@ -2305,6 +2328,8 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
+unsigned long randomize_stack_top(unsigned long stack_top);
+
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
extern unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2568,6 +2593,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
/*
* NOTE on FOLL_LONGTERM:
@@ -2845,5 +2871,12 @@ void __init setup_nr_node_ids(void);
static inline void setup_nr_node_ids(void) {}
#endif
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+ return !memcmp_pages(page1, page2);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0b739f360cec..5183e0d77dfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -138,6 +138,7 @@ struct page {
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
unsigned long _compound_pad_2;
+ /* For both global and memcg */
struct list_head deferred_list;
};
struct { /* Page table pages */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3f38c30d2f13..bda20282746b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -235,6 +235,8 @@ enum node_stat_item {
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
+ NR_FILE_THPS,
+ NR_FILE_PMDMAPPED,
NR_ANON_THPS,
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_VMSCAN_WRITE,
@@ -677,6 +679,14 @@ struct zonelist {
extern struct page *mem_map;
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct deferred_split {
+ spinlock_t split_queue_lock;
+ struct list_head split_queue;
+ unsigned long split_queue_len;
+};
+#endif
+
/*
* On NUMA machines, each NUMA node would have a pg_data_t to describe
* it's memory layout. On UMA machines there is a single pglist_data which
@@ -756,9 +766,7 @@ typedef struct pglist_data {
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- spinlock_t split_queue_lock;
- struct list_head split_queue;
- unsigned long split_queue_len;
+ struct deferred_split deferred_split_queue;
#endif
/* Fields commonly accessed by the page reclaim scanner */
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 09592951725c..682fd465df06 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -18,6 +18,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
+ PAGE_EXT_OWNER_ACTIVE,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c7552459a15f..37a4d9e32cd3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -333,6 +333,16 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
+static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+{
+ if (PageHuge(page))
+ return page;
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+
+ return page + (offset & (compound_nr(page) - 1));
+}
+
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h
deleted file mode 100644
index 034982c98c8b..000000000000
--- a/include/linux/quicklist.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_QUICKLIST_H
-#define LINUX_QUICKLIST_H
-/*
- * Fast allocations and disposal of pages. Pages must be in the condition
- * as needed after allocation when they are freed. Per cpu lists of pages
- * are kept that only contain node local pages.
- *
- * (C) 2007, SGI. Christoph Lameter <cl@linux.com>
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/percpu.h>
-
-#ifdef CONFIG_QUICKLIST
-
-struct quicklist {
- void *page;
- int nr_pages;
-};
-
-DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
-
-/*
- * The two key functions quicklist_alloc and quicklist_free are inline so
- * that they may be custom compiled for the platform.
- * Specifying a NULL ctor can remove constructor support. Specifying
- * a constant quicklist allows the determination of the exact address
- * in the per cpu area.
- *
- * The fast patch in quicklist_alloc touched only a per cpu cacheline and
- * the first cacheline of the page itself. There is minmal overhead involved.
- */
-static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
-{
- struct quicklist *q;
- void **p = NULL;
-
- q =&get_cpu_var(quicklist)[nr];
- p = q->page;
- if (likely(p)) {
- q->page = p[0];
- p[0] = NULL;
- q->nr_pages--;
- }
- put_cpu_var(quicklist);
- if (likely(p))
- return p;
-
- p = (void *)__get_free_page(flags | __GFP_ZERO);
- if (ctor && p)
- ctor(p);
- return p;
-}
-
-static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p,
- struct page *page)
-{
- struct quicklist *q;
-
- q = &get_cpu_var(quicklist)[nr];
- *(void **)p = q->page;
- q->page = p;
- q->nr_pages++;
- put_cpu_var(quicklist);
-}
-
-static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
-{
- __quicklist_free(nr, dtor, pp, virt_to_page(pp));
-}
-
-static inline void quicklist_free_page(int nr, void (*dtor)(void *),
- struct page *page)
-{
- __quicklist_free(nr, dtor, page_address(page), page);
-}
-
-void quicklist_trim(int nr, void (*dtor)(void *),
- unsigned long min_pages, unsigned long max_free);
-
-unsigned long quicklist_total_size(void);
-
-#else
-
-static inline unsigned long quicklist_total_size(void)
-{
- return 0;
-}
-
-#endif
-
-#endif /* LINUX_QUICKLIST_H */
-
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 9443cafd1969..0f80123650e2 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -69,7 +69,7 @@ struct shrinker {
/* These are for internal use */
struct list_head list;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
/* ID in shrinker_idr */
int id;
#endif
@@ -81,6 +81,11 @@ struct shrinker {
/* Flags */
#define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_MEMCG_AWARE (1 << 1)
+/*
+ * It just makes sense when the shrinker is also MEMCG_AWARE for now,
+ * non-MEMCG_AWARE shrinker should not have this flag set.
+ */
+#define SHRINKER_NONSLAB (1 << 2)
extern int prealloc_shrinker(struct shrinker *shrinker);
extern void register_shrinker_prepared(struct shrinker *shrinker);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 56c9c7eed34e..ab2b98ad76e1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -595,68 +595,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
return __kmalloc_node(size, flags, node);
}
-struct memcg_cache_array {
- struct rcu_head rcu;
- struct kmem_cache *entries[0];
-};
-
-/*
- * This is the main placeholder for memcg-related information in kmem caches.
- * Both the root cache and the child caches will have it. For the root cache,
- * this will hold a dynamically allocated array large enough to hold
- * information about the currently limited memcgs in the system. To allow the
- * array to be accessed without taking any locks, on relocation we free the old
- * version only after a grace period.
- *
- * Root and child caches hold different metadata.
- *
- * @root_cache: Common to root and child caches. NULL for root, pointer to
- * the root cache for children.
- *
- * The following fields are specific to root caches.
- *
- * @memcg_caches: kmemcg ID indexed table of child caches. This table is
- * used to index child cachces during allocation and cleared
- * early during shutdown.
- *
- * @root_caches_node: List node for slab_root_caches list.
- *
- * @children: List of all child caches. While the child caches are also
- * reachable through @memcg_caches, a child cache remains on
- * this list until it is actually destroyed.
- *
- * The following fields are specific to child caches.
- *
- * @memcg: Pointer to the memcg this cache belongs to.
- *
- * @children_node: List node for @root_cache->children list.
- *
- * @kmem_caches_node: List node for @memcg->kmem_caches list.
- */
-struct memcg_cache_params {
- struct kmem_cache *root_cache;
- union {
- struct {
- struct memcg_cache_array __rcu *memcg_caches;
- struct list_head __root_caches_node;
- struct list_head children;
- bool dying;
- };
- struct {
- struct mem_cgroup *memcg;
- struct list_head children_node;
- struct list_head kmem_caches_node;
- struct percpu_ref refcnt;
-
- void (*work_fn)(struct kmem_cache *);
- union {
- struct rcu_head rcu_head;
- struct work_struct work;
- };
- };
- };
-};
-
int memcg_update_all_caches(int num_memcgs);
/**
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 3e2a80cc7b56..96305a64a5a7 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -53,18 +53,4 @@ extern const struct blk_integrity_profile t10_pi_type1_ip;
extern const struct blk_integrity_profile t10_pi_type3_crc;
extern const struct blk_integrity_profile t10_pi_type3_ip;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-extern void t10_pi_prepare(struct request *rq, u8 protection_type);
-extern void t10_pi_complete(struct request *rq, u8 protection_type,
- unsigned int intervals);
-#else
-static inline void t10_pi_complete(struct request *rq, u8 protection_type,
- unsigned int intervals)
-{
-}
-static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
-{
-}
-#endif
-
#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index dfa718ffdd4f..4e7809408073 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -53,15 +53,21 @@ struct vmap_area {
unsigned long va_start;
unsigned long va_end;
- /*
- * Largest available free size in subtree.
- */
- unsigned long subtree_max_size;
- unsigned long flags;
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
- struct llist_node purge_list; /* "lazy purge" list */
- struct vm_struct *vm;
+
+ /*
+ * The following three variables can be packed, because
+ * a vmap_area object is always one of the three states:
+ * 1) in "free" tree (root is vmap_area_root)
+ * 2) in "busy" tree (root is free_vmap_area_root)
+ * 3) in purge list (head is vmap_purge_list)
+ */
+ union {
+ unsigned long subtree_max_size; /* in "free" tree */
+ struct vm_struct *vm; /* in "busy" tree */
+ struct llist_node purge_list; /* in purge list */
+ };
};
/*
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 7238865e75b0..51bf43076165 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -46,6 +46,8 @@ const char *zpool_get_type(struct zpool *pool);
void zpool_destroy_pool(struct zpool *pool);
+bool zpool_malloc_support_movable(struct zpool *pool);
+
int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
unsigned long *handle);
@@ -90,6 +92,7 @@ struct zpool_driver {
struct zpool *zpool);
void (*destroy)(void *pool);
+ bool malloc_support_movable;
int (*malloc)(void *pool, size_t size, gfp_t gfp,
unsigned long *handle);
void (*free)(void *pool, unsigned long handle);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 96ee9d94b73e..ea57526a5b89 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -28,6 +28,7 @@ struct io_uring_sqe {
__u16 poll_events;
__u32 sync_range_flags;
__u32 msg_flags;
+ __u32 timeout_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@@ -61,6 +62,7 @@ struct io_uring_sqe {
#define IORING_OP_SYNC_FILE_RANGE 8
#define IORING_OP_SENDMSG 9
#define IORING_OP_RECVMSG 10
+#define IORING_OP_TIMEOUT 11
/*
* sqe->fsync_flags
diff --git a/init/main.c b/init/main.c
index 653693da8da6..208b8fa1808e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -507,7 +507,7 @@ void __init __weak mem_encrypt_init(void) { }
void __init __weak poking_init(void) { }
-void __init __weak pgd_cache_init(void) { }
+void __init __weak pgtable_cache_init(void) { }
bool initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
@@ -556,6 +556,7 @@ static void __init mm_init(void)
report_meminit();
mem_init();
kmem_cache_init();
+ kmemleak_init();
pgtable_init();
debug_objects_mem_init();
vmalloc_init();
@@ -564,7 +565,6 @@ static void __init mm_init(void)
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
pti_init();
- pgd_cache_init();
}
void __init __weak arch_call_rest_init(void)
@@ -594,7 +594,6 @@ asmlinkage __visible void __init start_kernel(void)
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
- mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
@@ -740,7 +739,6 @@ asmlinkage __visible void __init start_kernel(void)
initrd_start = 0;
}
#endif
- kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
acpi_early_init();
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index cc0d0cf114e3..a70f7209cda3 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -14,8 +14,9 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/kdev_t.h>
-#include <linux/parser.h>
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
@@ -583,58 +584,52 @@ static const struct super_operations bpf_super_ops = {
enum {
OPT_MODE,
- OPT_ERR,
};
-static const match_table_t bpf_mount_tokens = {
- { OPT_MODE, "mode=%o" },
- { OPT_ERR, NULL },
+static const struct fs_parameter_spec bpf_param_specs[] = {
+ fsparam_u32oct ("mode", OPT_MODE),
+ {}
+};
+
+static const struct fs_parameter_description bpf_fs_parameters = {
+ .name = "bpf",
+ .specs = bpf_param_specs,
};
struct bpf_mount_opts {
umode_t mode;
};
-static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
+static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- substring_t args[MAX_OPT_ARGS];
- int option, token;
- char *ptr;
+ struct bpf_mount_opts *opts = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
- opts->mode = S_IRWXUGO;
-
- while ((ptr = strsep(&data, ",")) != NULL) {
- if (!*ptr)
- continue;
-
- token = match_token(ptr, bpf_mount_tokens, args);
- switch (token) {
- case OPT_MODE:
- if (match_octal(&args[0], &option))
- return -EINVAL;
- opts->mode = option & S_IALLUGO;
- break;
+ opt = fs_parse(fc, &bpf_fs_parameters, param, &result);
+ if (opt < 0)
/* We might like to report bad mount options here, but
* traditionally we've ignored all mount options, so we'd
* better continue to ignore non-existing options for bpf.
*/
- }
+ return opt == -ENOPARAM ? 0 : opt;
+
+ switch (opt) {
+ case OPT_MODE:
+ opts->mode = result.uint_32 & S_IALLUGO;
+ break;
}
return 0;
}
-static int bpf_fill_super(struct super_block *sb, void *data, int silent)
+static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr bpf_rfiles[] = { { "" } };
- struct bpf_mount_opts opts;
+ struct bpf_mount_opts *opts = fc->fs_private;
struct inode *inode;
int ret;
- ret = bpf_parse_options(data, &opts);
- if (ret)
- return ret;
-
ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
if (ret)
return ret;
@@ -644,21 +639,50 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
inode = sb->s_root->d_inode;
inode->i_op = &bpf_dir_iops;
inode->i_mode &= ~S_IALLUGO;
- inode->i_mode |= S_ISVTX | opts.mode;
+ inode->i_mode |= S_ISVTX | opts->mode;
return 0;
}
-static struct dentry *bpf_mount(struct file_system_type *type, int flags,
- const char *dev_name, void *data)
+static int bpf_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, bpf_fill_super);
+}
+
+static void bpf_free_fc(struct fs_context *fc)
{
- return mount_nodev(type, flags, data, bpf_fill_super);
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations bpf_context_ops = {
+ .free = bpf_free_fc,
+ .parse_param = bpf_parse_param,
+ .get_tree = bpf_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int bpf_init_fs_context(struct fs_context *fc)
+{
+ struct bpf_mount_opts *opts;
+
+ opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ opts->mode = S_IRWXUGO;
+
+ fc->fs_private = opts;
+ fc->ops = &bpf_context_ops;
+ return 0;
}
static struct file_system_type bpf_fs_type = {
.owner = THIS_MODULE,
.name = "bpf",
- .mount = bpf_mount,
+ .init_fs_context = bpf_init_fs_context,
+ .parameters = &bpf_fs_parameters,
.kill_sb = kill_litter_super,
};
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 84fa00497c49..94d38a39d72e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
#include <linux/percpu-rwsem.h>
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
+#include <linux/khugepaged.h>
#include <linux/uprobes.h>
@@ -143,17 +144,19 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
*
* @vma: vma that holds the pte pointing to page
* @addr: address the old @page is mapped at
- * @page: the cowed page we are replacing by kpage
- * @kpage: the modified page we replace page by
+ * @old_page: the page we are replacing by new_page
+ * @new_page: the modified page we replace page by
*
- * Returns 0 on success, -EFAULT on failure.
+ * If @new_page is NULL, only unmap @old_page.
+ *
+ * Returns 0 on success, negative error code otherwise.
*/
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct page *old_page, struct page *new_page)
{
struct mm_struct *mm = vma->vm_mm;
struct page_vma_mapped_walk pvmw = {
- .page = old_page,
+ .page = compound_head(old_page),
.vma = vma,
.address = addr,
};
@@ -164,12 +167,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
- VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
- err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
- false);
- if (err)
- return err;
+ if (new_page) {
+ err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+ &memcg, false);
+ if (err)
+ return err;
+ }
/* For try_to_free_swap() and munlock_vma_page() below */
lock_page(old_page);
@@ -177,15 +180,20 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
mmu_notifier_invalidate_range_start(&range);
err = -EAGAIN;
if (!page_vma_mapped_walk(&pvmw)) {
- mem_cgroup_cancel_charge(new_page, memcg, false);
+ if (new_page)
+ mem_cgroup_cancel_charge(new_page, memcg, false);
goto unlock;
}
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
- get_page(new_page);
- page_add_new_anon_rmap(new_page, vma, addr, false);
- mem_cgroup_commit_charge(new_page, memcg, false, false);
- lru_cache_add_active_or_unevictable(new_page, vma);
+ if (new_page) {
+ get_page(new_page);
+ page_add_new_anon_rmap(new_page, vma, addr, false);
+ mem_cgroup_commit_charge(new_page, memcg, false, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
+ } else
+ /* no new page, just dec_mm_counter for old_page */
+ dec_mm_counter(mm, MM_ANONPAGES);
if (!PageAnon(old_page)) {
dec_mm_counter(mm, mm_counter_file(old_page));
@@ -194,8 +202,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
ptep_clear_flush_notify(vma, addr, pvmw.pte);
- set_pte_at_notify(mm, addr, pvmw.pte,
- mk_pte(new_page, vma->vm_page_prot));
+ if (new_page)
+ set_pte_at_notify(mm, addr, pvmw.pte,
+ mk_pte(new_page, vma->vm_page_prot));
page_remove_rmap(old_page, false);
if (!page_mapped(old_page))
@@ -464,6 +473,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
struct page *old_page, *new_page;
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
+ bool orig_page_huge = false;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
@@ -471,7 +481,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
retry:
/* Read the page with vaddr into memory */
ret = get_user_pages_remote(NULL, mm, vaddr, 1,
- FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+ FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL);
if (ret <= 0)
return ret;
@@ -488,6 +498,10 @@ retry:
ref_ctr_updated = 1;
}
+ ret = 0;
+ if (!is_register && !PageAnon(old_page))
+ goto put_old;
+
ret = anon_vma_prepare(vma);
if (ret)
goto put_old;
@@ -501,8 +515,33 @@ retry:
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
+ if (!is_register) {
+ struct page *orig_page;
+ pgoff_t index;
+
+ VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);
+
+ index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+ orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
+ index);
+
+ if (orig_page) {
+ if (PageUptodate(orig_page) &&
+ pages_identical(new_page, orig_page)) {
+ /* let go new_page */
+ put_page(new_page);
+ new_page = NULL;
+
+ if (PageCompound(orig_page))
+ orig_page_huge = true;
+ }
+ put_page(orig_page);
+ }
+ }
+
ret = __replace_page(vma, vaddr, old_page, new_page);
- put_page(new_page);
+ if (new_page)
+ put_page(new_page);
put_old:
put_page(old_page);
@@ -513,6 +552,10 @@ put_old:
if (ret && is_register && ref_ctr_updated)
update_ref_ctr(uprobe, mm, -1);
+ /* try collapse pmd for compound page */
+ if (!ret && orig_page_huge)
+ collapse_pte_mapped_thp(mm, vaddr);
+
return ret;
}
diff --git a/kernel/resource.c b/kernel/resource.c
index 74877e9d90ca..76036a41143b 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -487,8 +487,8 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
while (start < end &&
!find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
false, &res)) {
- pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
- end_pfn = (res.end + 1) >> PAGE_SHIFT;
+ pfn = PFN_UP(res.start);
+ end_pfn = PFN_DOWN(res.end + 1);
if (end_pfn > pfn)
ret = (*func)(pfn, end_pfn - pfn, arg);
if (ret)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c892c6280c9f..8dad5aa600ea 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -238,7 +238,6 @@ static void do_idle(void)
tick_nohz_idle_enter();
while (!need_resched()) {
- check_pgt_cache();
rmb();
local_irq_disable();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 078950d9605b..00fcea236eba 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -264,7 +264,8 @@ extern struct ctl_table epoll_table[];
extern struct ctl_table firmware_config_table[];
#endif
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
int sysctl_legacy_va_layout;
#endif
@@ -1573,7 +1574,8 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec,
.extra1 = SYSCTL_ZERO,
},
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
{
.procname = "legacy_va_layout",
.data = &sysctl_legacy_va_layout,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e0e14780a13d..6b1b1703a646 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -576,17 +576,18 @@ config DEBUG_KMEMLEAK
In order to access the kmemleak file, debugfs needs to be
mounted (usually at /sys/kernel/debug).
-config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
- int "Maximum kmemleak early log entries"
+config DEBUG_KMEMLEAK_MEM_POOL_SIZE
+ int "Kmemleak memory pool size"
depends on DEBUG_KMEMLEAK
- range 200 40000
- default 400
+ range 200 1000000
+ default 16000
help
Kmemleak must track all the memory allocations to avoid
reporting false positives. Since memory may be allocated or
- freed before kmemleak is initialised, an early log buffer is
- used to store these actions. If kmemleak reports "early log
- buffer exceeded", please increase this value.
+ freed before kmemleak is fully initialised, use a static pool
+ of metadata objects to track such callbacks. After kmemleak is
+ fully initialised, this memory pool acts as an emergency one
+ if slab allocations fail.
config DEBUG_KMEMLEAK_TEST
tristate "Simple test for the kernel memory leak detector"
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 7fa97a8b5717..6c9682ce0254 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -134,6 +134,14 @@ config KASAN_S390_4_LEVEL_PAGING
to 3TB of RAM with KASan enabled). This options allows to force
4-level paging instead.
+config KASAN_SW_TAGS_IDENTIFY
+ bool "Enable memory corruption identification"
+ depends on KASAN_SW_TAGS
+ help
+ This option enables best-effort identification of bug type
+ (use-after-free or out-of-bounds) at the cost of increased
+ memory consumption.
+
config TEST_KASAN
tristate "Module for testing KASAN for bug detection"
depends on m && KASAN
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f1e0569b4539..639d5e7014c1 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -878,7 +878,7 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
head = compound_head(page);
v += (page - head) << PAGE_SHIFT;
- if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
+ if (likely(n <= v && v <= (page_size(head))))
return true;
WARN_ON(1);
return false;
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 5c86ef4c899f..1c26c14ffbb9 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -6,7 +6,6 @@
*/
#include <linux/mm.h>
-#include <linux/quicklist.h>
#include <linux/cma.h>
void show_mem(unsigned int filter, nodemask_t *nodemask)
@@ -39,10 +38,6 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
#ifdef CONFIG_CMA
printk("%lu pages cma reserved\n", totalcma_pages);
#endif
-#ifdef CONFIG_QUICKLIST
- printk("%lu pages in pagetable cache\n",
- quicklist_total_size());
-#endif
#ifdef CONFIG_MEMORY_FAILURE
printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
#endif
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index b63b367a94e8..49cc4d570a40 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -18,6 +18,9 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
/*
* Note: test functions are marked noinline so that their names appear in
@@ -337,6 +340,42 @@ static noinline void __init kmalloc_uaf2(void)
kfree(ptr2);
}
+static noinline void __init kfree_via_page(void)
+{
+ char *ptr;
+ size_t size = 8;
+ struct page *page;
+ unsigned long offset;
+
+ pr_info("invalid-free false positive (via page)\n");
+ ptr = kmalloc(size, GFP_KERNEL);
+ if (!ptr) {
+ pr_err("Allocation failed\n");
+ return;
+ }
+
+ page = virt_to_page(ptr);
+ offset = offset_in_page(ptr);
+ kfree(page_address(page) + offset);
+}
+
+static noinline void __init kfree_via_phys(void)
+{
+ char *ptr;
+ size_t size = 8;
+ phys_addr_t phys;
+
+ pr_info("invalid-free false positive (via phys)\n");
+ ptr = kmalloc(size, GFP_KERNEL);
+ if (!ptr) {
+ pr_err("Allocation failed\n");
+ return;
+ }
+
+ phys = virt_to_phys(ptr);
+ kfree(phys_to_virt(phys));
+}
+
static noinline void __init kmem_cache_oob(void)
{
char *p;
@@ -737,6 +776,8 @@ static int __init kmalloc_tests_init(void)
kmalloc_uaf();
kmalloc_uaf_memset();
kmalloc_uaf2();
+ kfree_via_page();
+ kfree_via_phys();
kmem_cache_oob();
memcg_accounted_kmem_cache();
kasan_stack_oob();
diff --git a/mm/Kconfig b/mm/Kconfig
index 2fe4902ad755..a5dae9a7eb51 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -273,11 +273,6 @@ config BOUNCE
by default when ZONE_DMA or HIGHMEM is selected, but you
may say n to override this.
-config NR_QUICK
- int
- depends on QUICKLIST
- default "1"
-
config VIRT_TO_BUS
bool
help
@@ -717,6 +712,17 @@ config GUP_BENCHMARK
config GUP_GET_PTE_LOW_HIGH
bool
+config READ_ONLY_THP_FOR_FS
+ bool "Read-only THP for filesystems (EXPERIMENTAL)"
+ depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
+
+ help
+ Allow khugepaged to put read-only file-backed pages in THP.
+
+ This is marked experimental because it is a new feature. Write
+ support of file THPs will be developed in the next few release
+ cycles.
+
config ARCH_HAS_PTE_SPECIAL
bool
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 82b6a20898bd..327b3ebf23bf 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -21,7 +21,9 @@ config DEBUG_PAGEALLOC
Also, the state of page tracking structures is checked more often as
pages are being allocated and freed, as unexpected state changes
often happen for same reasons as memory corruption (e.g. double free,
- use-after-free).
+ use-after-free). The error reports for these checks can be augmented
+ with stack traces of last allocation and freeing of the page, when
+ PAGE_OWNER is also selected and enabled on boot.
For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC,
fill the pages with poison patterns after free_pages() and verify
diff --git a/mm/Makefile b/mm/Makefile
index d0b295c3b764..d996846697ef 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n
KCOV_INSTRUMENT_mmzone.o := n
KCOV_INSTRUMENT_vmstat.o := n
+CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
+CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
+
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
@@ -72,7 +75,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
-obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 952dc2fb24e5..ce08b39d85d4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -969,7 +969,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* is safe to read and it's 0 for tail pages.
*/
if (unlikely(PageCompound(page))) {
- low_pfn += (1UL << compound_order(page)) - 1;
+ low_pfn += compound_nr(page) - 1;
goto isolate_fail;
}
}
@@ -1737,8 +1737,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
* starting at the block pointed to by the migrate scanner pfn within
* compact_control.
*/
-static isolate_migrate_t isolate_migratepages(struct zone *zone,
- struct compact_control *cc)
+static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
{
unsigned long block_start_pfn;
unsigned long block_end_pfn;
@@ -1756,8 +1755,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
*/
low_pfn = fast_find_migrateblock(cc);
block_start_pfn = pageblock_start_pfn(low_pfn);
- if (block_start_pfn < zone->zone_start_pfn)
- block_start_pfn = zone->zone_start_pfn;
+ if (block_start_pfn < cc->zone->zone_start_pfn)
+ block_start_pfn = cc->zone->zone_start_pfn;
/*
* fast_find_migrateblock marks a pageblock skipped so to avoid
@@ -1787,8 +1786,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cond_resched();
- page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
- zone);
+ page = pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, cc->zone);
if (!page)
continue;
@@ -2078,6 +2077,17 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
const bool sync = cc->mode != MIGRATE_ASYNC;
bool update_cached;
+ /*
+ * These counters track activities during zone compaction. Initialize
+ * them before compacting a new zone.
+ */
+ cc->total_migrate_scanned = 0;
+ cc->total_free_scanned = 0;
+ cc->nr_migratepages = 0;
+ cc->nr_freepages = 0;
+ INIT_LIST_HEAD(&cc->freepages);
+ INIT_LIST_HEAD(&cc->migratepages);
+
cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
cc->classzone_idx);
@@ -2158,7 +2168,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
cc->rescan = true;
}
- switch (isolate_migratepages(cc->zone, cc)) {
+ switch (isolate_migratepages(cc)) {
case ISOLATE_ABORT:
ret = COMPACT_CONTENDED;
putback_movable_pages(&cc->migratepages);
@@ -2281,10 +2291,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
{
enum compact_result ret;
struct compact_control cc = {
- .nr_freepages = 0,
- .nr_migratepages = 0,
- .total_migrate_scanned = 0,
- .total_free_scanned = 0,
.order = order,
.search_order = order,
.gfp_mask = gfp_mask,
@@ -2305,8 +2311,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
if (capture)
current->capture_control = &capc;
- INIT_LIST_HEAD(&cc.freepages);
- INIT_LIST_HEAD(&cc.migratepages);
ret = compact_zone(&cc, &capc);
@@ -2408,8 +2412,6 @@ static void compact_node(int nid)
struct zone *zone;
struct compact_control cc = {
.order = -1,
- .total_migrate_scanned = 0,
- .total_free_scanned = 0,
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
.whole_zone = true,
@@ -2423,11 +2425,7 @@ static void compact_node(int nid)
if (!populated_zone(zone))
continue;
- cc.nr_freepages = 0;
- cc.nr_migratepages = 0;
cc.zone = zone;
- INIT_LIST_HEAD(&cc.freepages);
- INIT_LIST_HEAD(&cc.migratepages);
compact_zone(&cc, NULL);
@@ -2529,8 +2527,6 @@ static void kcompactd_do_work(pg_data_t *pgdat)
struct compact_control cc = {
.order = pgdat->kcompactd_max_order,
.search_order = pgdat->kcompactd_max_order,
- .total_migrate_scanned = 0,
- .total_free_scanned = 0,
.classzone_idx = pgdat->kcompactd_classzone_idx,
.mode = MIGRATE_SYNC_LIGHT,
.ignore_skip_hint = false,
@@ -2554,16 +2550,10 @@ static void kcompactd_do_work(pg_data_t *pgdat)
COMPACT_CONTINUE)
continue;
- cc.nr_freepages = 0;
- cc.nr_migratepages = 0;
- cc.total_migrate_scanned = 0;
- cc.total_free_scanned = 0;
- cc.zone = zone;
- INIT_LIST_HEAD(&cc.freepages);
- INIT_LIST_HEAD(&cc.migratepages);
-
if (kthread_should_stop())
return;
+
+ cc.zone = zone;
status = compact_zone(&cc, NULL);
if (status == COMPACT_SUCCESS) {
diff --git a/mm/filemap.c b/mm/filemap.c
index 40667c2f3383..1146fcfa3215 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -126,7 +126,7 @@ static void page_cache_delete(struct address_space *mapping,
/* hugetlb pages are represented by a single entry in the xarray */
if (!PageHuge(page)) {
xas_set_order(&xas, page->index, compound_order(page));
- nr = 1U << compound_order(page);
+ nr = compound_nr(page);
}
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -203,8 +203,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
if (PageTransHuge(page))
__dec_node_page_state(page, NR_SHMEM_THPS);
- } else {
- VM_BUG_ON_PAGE(PageTransHuge(page), page);
+ } else if (PageTransHuge(page)) {
+ __dec_node_page_state(page, NR_FILE_THPS);
+ filemap_nr_thps_dec(mapping);
}
/*
@@ -281,11 +282,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
* @pvec: pagevec with pages to delete
*
* The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index.
+ * from the mapping. The function expects @pvec to be sorted by page index
+ * and is optimised for it to be dense.
* It tolerates holes in @pvec (mapping entries at those indices are not
* modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the
- * mapping as well.
+ * @pvec.
*
* The function expects the i_pages lock to be held.
*/
@@ -294,40 +295,43 @@ static void page_cache_delete_batch(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
int total_pages = 0;
- int i = 0, tail_pages = 0;
+ int i = 0;
struct page *page;
mapping_set_update(&xas, mapping);
xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec) && !tail_pages)
+ if (i >= pagevec_count(pvec))
break;
+
+ /* A swap/dax/shadow entry got inserted? Skip it. */
if (xa_is_value(page))
continue;
- if (!tail_pages) {
- /*
- * Some page got inserted in our range? Skip it. We
- * have our pages locked so they are protected from
- * being removed.
- */
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index >
- pvec->pages[i]->index, page);
- continue;
- }
- WARN_ON_ONCE(!PageLocked(page));
- if (PageTransHuge(page) && !PageHuge(page))
- tail_pages = HPAGE_PMD_NR - 1;
+ /*
+ * A page got inserted in our range? Skip it. We have our
+ * pages locked so they are protected from being removed.
+ * If we see a page whose index is higher than ours, it
+ * means our page has been removed, which shouldn't be
+ * possible because we're holding the PageLock.
+ */
+ if (page != pvec->pages[i]) {
+ VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
+ page);
+ continue;
+ }
+
+ WARN_ON_ONCE(!PageLocked(page));
+
+ if (page->index == xas.xa_index)
page->mapping = NULL;
- /*
- * Leave page->index set: truncation lookup relies
- * upon it
- */
+ /* Leave page->index set: truncation lookup relies on it */
+
+ /*
+ * Move to the next page in the vector if this is a regular
+ * page or the index is of the last sub-page of this compound
+ * page.
+ */
+ if (page->index + compound_nr(page) - 1 == xas.xa_index)
i++;
- } else {
- VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
- != pvec->pages[i]->index, page);
- tail_pages--;
- }
xas_store(&xas, NULL);
total_pages++;
}
@@ -408,7 +412,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
.range_end = end,
};
- if (!mapping_cap_writeback_dirty(mapping))
+ if (!mapping_cap_writeback_dirty(mapping) ||
+ !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0;
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
@@ -617,10 +622,13 @@ int filemap_fdatawait_keep_errors(struct address_space *mapping)
}
EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
+/* Returns true if writeback might be needed or already in progress. */
static bool mapping_needs_writeback(struct address_space *mapping)
{
- return (!dax_mapping(mapping) && mapping->nrpages) ||
- (dax_mapping(mapping) && mapping->nrexceptional);
+ if (dax_mapping(mapping))
+ return mapping->nrexceptional;
+
+ return mapping->nrpages;
}
int filemap_write_and_wait(struct address_space *mapping)
@@ -1516,7 +1524,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
XA_STATE(xas, &mapping->i_pages, offset);
- struct page *head, *page;
+ struct page *page;
rcu_read_lock();
repeat:
@@ -1531,25 +1539,19 @@ repeat:
if (!page || xa_is_value(page))
goto out;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto repeat;
- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
- goto repeat;
- }
-
/*
- * Has the page moved?
+ * Has the page moved or been split?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != xas_reload(&xas))) {
- put_page(head);
+ put_page(page);
goto repeat;
}
+ page = find_subpage(page, offset);
out:
rcu_read_unlock();
@@ -1646,7 +1648,7 @@ repeat:
}
/* Has the page been truncated? */
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(compound_head(page)->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
@@ -1731,7 +1733,6 @@ unsigned find_get_entries(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, page, ULONG_MAX) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1742,17 +1743,13 @@ unsigned find_get_entries(struct address_space *mapping,
if (xa_is_value(page))
goto export;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
+ page = find_subpage(page, xas.xa_index);
export:
indices[ret] = xas.xa_index;
@@ -1761,7 +1758,7 @@ export:
break;
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1803,33 +1800,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
rcu_read_lock();
xas_for_each(&xas, page, end) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/* Skip over shadow, swap and DAX entries */
if (xa_is_value(page))
continue;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = page;
+ pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1874,7 +1865,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
for (page = xas_load(&xas); page; page = xas_next(&xas)) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1884,24 +1874,19 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
if (xa_is_value(page))
break;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = page;
+ pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages)
break;
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1937,7 +1922,6 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
rcu_read_lock();
xas_for_each_marked(&xas, page, end, tag) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1948,26 +1932,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
if (xa_is_value(page))
continue;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = page;
+ pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages) {
*index = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -2562,12 +2541,12 @@ retry_find:
goto out_retry;
/* Did it get truncated? */
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(compound_head(page)->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto retry_find;
}
- VM_BUG_ON_PAGE(page->index != offset, page);
+ VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
/*
* We have a locked page in the page cache, now we need to check
@@ -2648,7 +2627,7 @@ void filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *head, *page;
+ struct page *page;
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
@@ -2657,24 +2636,19 @@ void filemap_map_pages(struct vm_fault *vmf,
if (xa_is_value(page))
goto next;
- head = compound_head(page);
-
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
- if (PageLocked(head))
+ if (PageLocked(page))
goto next;
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto next;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto skip;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto skip;
+ page = find_subpage(page, xas.xa_index);
if (!PageUptodate(page) ||
PageReadahead(page) ||
diff --git a/mm/gup.c b/mm/gup.c
index 98f13ab37bac..60c3915c8ee6 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
unsigned int page_mask;
};
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
- unsigned long npages,
- set_dirty_func_t sdf)
-{
- unsigned long index;
-
- for (index = 0; index < npages; index++) {
- struct page *page = compound_head(pages[index]);
-
- /*
- * Checking PageDirty at this point may race with
- * clear_page_dirty_for_io(), but that's OK. Two key cases:
- *
- * 1) This code sees the page as already dirty, so it skips
- * the call to sdf(). That could happen because
- * clear_page_dirty_for_io() called page_mkclean(),
- * followed by set_page_dirty(). However, now the page is
- * going to get written back, which meets the original
- * intention of setting it dirty, so all is well:
- * clear_page_dirty_for_io() goes on to call
- * TestClearPageDirty(), and write the page back.
- *
- * 2) This code sees the page as clean, so it calls sdf().
- * The page stays dirty, despite being written back, so it
- * gets written back again in the next writeback cycle.
- * This is harmless.
- */
- if (!PageDirty(page))
- sdf(page);
-
- put_user_page(page);
- }
-}
-
/**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages: array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages: array of pages to be maybe marked dirty, and definitely released.
* @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
*
* "gup-pinned page" refers to a page that has had one of the get_user_pages()
* variants called on that page.
*
* For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
*
* Please see the put_user_page() documentation for details.
*
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
*
*/
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+ bool make_dirty)
{
- __put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+ unsigned long index;
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages: array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
- __put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+ /*
+ * TODO: this can be optimized for huge pages: if a series of pages is
+ * physically contiguous and part of the same compound page, then a
+ * single operation to the head page should suffice.
+ */
+
+ if (!make_dirty) {
+ put_user_pages(pages, npages);
+ return;
+ }
+
+ for (index = 0; index < npages; index++) {
+ struct page *page = compound_head(pages[index]);
+ /*
+ * Checking PageDirty at this point may race with
+ * clear_page_dirty_for_io(), but that's OK. Two key
+ * cases:
+ *
+ * 1) This code sees the page as already dirty, so it
+ * skips the call to set_page_dirty(). That could happen
+ * because clear_page_dirty_for_io() called
+ * page_mkclean(), followed by set_page_dirty().
+ * However, now the page is going to get written back,
+ * which meets the original intention of setting it
+ * dirty, so all is well: clear_page_dirty_for_io() goes
+ * on to call TestClearPageDirty(), and write the page
+ * back.
+ *
+ * 2) This code sees the page as clean, so it calls
+ * set_page_dirty(). The page stays dirty, despite being
+ * written back, so it gets written back again in the
+ * next writeback cycle. This is harmless.
+ */
+ if (!PageDirty(page))
+ set_page_dirty_lock(page);
+ put_user_page(page);
+ }
}
EXPORT_SYMBOL(put_user_pages_dirty_lock);
@@ -399,7 +384,7 @@ retry_locked:
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
}
- if (flags & FOLL_SPLIT) {
+ if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
int ret;
page = pmd_page(*pmd);
if (is_huge_zero_page(page)) {
@@ -408,7 +393,7 @@ retry_locked:
split_huge_pmd(vma, pmd, address);
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
- } else {
+ } else if (flags & FOLL_SPLIT) {
if (unlikely(!try_get_page(page))) {
spin_unlock(ptl);
return ERR_PTR(-ENOMEM);
@@ -420,6 +405,10 @@ retry_locked:
put_page(page);
if (pmd_none(*pmd))
return no_page_table(vma, flags);
+ } else { /* flags & FOLL_SPLIT_PMD */
+ spin_unlock(ptl);
+ split_huge_pmd(vma, pmd, address);
+ ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
}
return ret ? ERR_PTR(ret) :
@@ -1460,7 +1449,7 @@ check_again:
* gup may start from a tail page. Advance step by the left
* part.
*/
- step = (1 << compound_order(head)) - (pages[i] - head);
+ step = compound_nr(head) - (pages[i] - head);
/*
* If we get a page from the CMA zone, since we are going to
* be pinning these entries, we might as well move them out
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de1f15969e27..73fc517c08d2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -496,11 +496,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
return pmd;
}
-static inline struct list_head *page_deferred_list(struct page *page)
+#ifdef CONFIG_MEMCG
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
{
- /* ->lru in the tail pages is occupied by compound_head. */
- return &page[2].deferred_list;
+ struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+ if (memcg)
+ return &memcg->deferred_split_queue;
+ else
+ return &pgdat->deferred_split_queue;
+}
+#else
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+{
+ struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+ return &pgdat->deferred_split_queue;
}
+#endif
void prep_transhuge_page(struct page *page)
{
@@ -2497,6 +2511,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct page *head = compound_head(page);
pg_data_t *pgdat = page_pgdat(head);
struct lruvec *lruvec;
+ struct address_space *swap_cache = NULL;
+ unsigned long offset = 0;
int i;
lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2504,6 +2520,14 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);
+ if (PageAnon(head) && PageSwapCache(head)) {
+ swp_entry_t entry = { .val = page_private(head) };
+
+ offset = swp_offset(entry);
+ swap_cache = swap_address_space(entry);
+ xa_lock(&swap_cache->i_pages);
+ }
+
for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
@@ -2513,6 +2537,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
shmem_uncharge(head->mapping->host, 1);
put_page(head + i);
+ } else if (!PageAnon(page)) {
+ __xa_store(&head->mapping->i_pages, head[i].index,
+ head + i, 0);
+ } else if (swap_cache) {
+ __xa_store(&swap_cache->i_pages, offset + i,
+ head + i, 0);
}
}
@@ -2523,10 +2553,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* See comment in __split_huge_page_tail() */
if (PageAnon(head)) {
/* Additional pin to swap cache */
- if (PageSwapCache(head))
+ if (PageSwapCache(head)) {
page_ref_add(head, 2);
- else
+ xa_unlock(&swap_cache->i_pages);
+ } else {
page_ref_inc(head);
+ }
} else {
/* Additional pin to page cache */
page_ref_add(head, 2);
@@ -2673,6 +2705,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
{
struct page *head = compound_head(page);
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int count, mapcount, extra_pins, ret;
@@ -2759,17 +2792,17 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}
/* Prevent deferred_split_scan() touching ->_refcount */
- spin_lock(&pgdata->split_queue_lock);
+ spin_lock(&ds_queue->split_queue_lock);
count = page_count(head);
mapcount = total_mapcount(head);
if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
- pgdata->split_queue_len--;
+ ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
}
if (mapping)
__dec_node_page_state(page, NR_SHMEM_THPS);
- spin_unlock(&pgdata->split_queue_lock);
+ spin_unlock(&ds_queue->split_queue_lock);
__split_huge_page(page, list, end, flags);
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
@@ -2786,7 +2819,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
dump_page(page, "total_mapcount(head) > 0");
BUG();
}
- spin_unlock(&pgdata->split_queue_lock);
+ spin_unlock(&ds_queue->split_queue_lock);
fail: if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
@@ -2808,53 +2841,86 @@ out:
void free_transhuge_page(struct page *page)
{
- struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
unsigned long flags;
- spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
if (!list_empty(page_deferred_list(page))) {
- pgdata->split_queue_len--;
+ ds_queue->split_queue_len--;
list_del(page_deferred_list(page));
}
- spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
free_compound_page(page);
}
void deferred_split_huge_page(struct page *page)
{
- struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+ struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
unsigned long flags;
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+ /*
+ * The try_to_unmap() in page reclaim path might reach here too,
+ * this may cause a race condition to corrupt deferred split queue.
+ * And, if page reclaim is already handling the same page, it is
+ * unnecessary to handle it again in shrinker.
+ *
+ * Check PageSwapCache to determine if the page is being
+ * handled by page reclaim since THP swap would add the page into
+ * swap cache before calling try_to_unmap().
+ */
+ if (PageSwapCache(page))
+ return;
+
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
if (list_empty(page_deferred_list(page))) {
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
- list_add_tail(page_deferred_list(page), &pgdata->split_queue);
- pgdata->split_queue_len++;
+ list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+ ds_queue->split_queue_len++;
+#ifdef CONFIG_MEMCG
+ if (memcg)
+ memcg_set_shrinker_bit(memcg, page_to_nid(page),
+ deferred_split_shrinker.id);
+#endif
}
- spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
}
static unsigned long deferred_split_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct pglist_data *pgdata = NODE_DATA(sc->nid);
- return READ_ONCE(pgdata->split_queue_len);
+ struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+
+#ifdef CONFIG_MEMCG
+ if (sc->memcg)
+ ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+ return READ_ONCE(ds_queue->split_queue_len);
}
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct pglist_data *pgdata = NODE_DATA(sc->nid);
+ struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
unsigned long flags;
LIST_HEAD(list), *pos, *next;
struct page *page;
int split = 0;
- spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+#ifdef CONFIG_MEMCG
+ if (sc->memcg)
+ ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
/* Take pin on all head pages to avoid freeing them under us */
- list_for_each_safe(pos, next, &pgdata->split_queue) {
+ list_for_each_safe(pos, next, &ds_queue->split_queue) {
page = list_entry((void *)pos, struct page, mapping);
page = compound_head(page);
if (get_page_unless_zero(page)) {
@@ -2862,12 +2928,12 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
} else {
/* We lost race with put_compound_page() */
list_del_init(page_deferred_list(page));
- pgdata->split_queue_len--;
+ ds_queue->split_queue_len--;
}
if (!--sc->nr_to_scan)
break;
}
- spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
list_for_each_safe(pos, next, &list) {
page = list_entry((void *)pos, struct page, mapping);
@@ -2881,15 +2947,15 @@ next:
put_page(page);
}
- spin_lock_irqsave(&pgdata->split_queue_lock, flags);
- list_splice_tail(&list, &pgdata->split_queue);
- spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ list_splice_tail(&list, &ds_queue->split_queue);
+ spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
/*
* Stop shrinker if we didn't split any page, but the queue is empty.
* This can happen if pages were freed under us.
*/
- if (!split && list_empty(&pgdata->split_queue))
+ if (!split && list_empty(&ds_queue->split_queue))
return SHRINK_STOP;
return split;
}
@@ -2898,7 +2964,8 @@ static struct shrinker deferred_split_shrinker = {
.count_objects = deferred_split_count,
.scan_objects = deferred_split_scan,
.seeks = DEFAULT_SEEKS,
- .flags = SHRINKER_NUMA_AWARE,
+ .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE |
+ SHRINKER_NONSLAB,
};
#ifdef CONFIG_DEBUG_FS
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6d7296dd11b8..ef37c85423a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1405,12 +1405,25 @@ pgoff_t __basepage_index(struct page *page)
}
static struct page *alloc_buddy_huge_page(struct hstate *h,
- gfp_t gfp_mask, int nid, nodemask_t *nmask)
+ gfp_t gfp_mask, int nid, nodemask_t *nmask,
+ nodemask_t *node_alloc_noretry)
{
int order = huge_page_order(h);
struct page *page;
+ bool alloc_try_hard = true;
- gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
+ /*
+ * By default we always try hard to allocate the page with
+ * __GFP_RETRY_MAYFAIL flag. However, if we are allocating pages in
+ * a loop (to adjust global huge page counts) and previous allocation
+ * failed, do not continue to try hard on the same node. Use the
+ * node_alloc_noretry bitmap to manage this state information.
+ */
+ if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry))
+ alloc_try_hard = false;
+ gfp_mask |= __GFP_COMP|__GFP_NOWARN;
+ if (alloc_try_hard)
+ gfp_mask |= __GFP_RETRY_MAYFAIL;
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
page = __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
@@ -1419,6 +1432,22 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
else
__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+ /*
+ * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
+ * indicates an overall state change. Clear bit so that we resume
+ * normal 'try hard' allocations.
+ */
+ if (node_alloc_noretry && page && !alloc_try_hard)
+ node_clear(nid, *node_alloc_noretry);
+
+ /*
+ * If we tried hard to get a page but failed, set bit so that
+ * subsequent attempts will not try as hard until there is an
+ * overall state change.
+ */
+ if (node_alloc_noretry && !page && alloc_try_hard)
+ node_set(nid, *node_alloc_noretry);
+
return page;
}
@@ -1427,7 +1456,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
* should use this function to get new hugetlb pages
*/
static struct page *alloc_fresh_huge_page(struct hstate *h,
- gfp_t gfp_mask, int nid, nodemask_t *nmask)
+ gfp_t gfp_mask, int nid, nodemask_t *nmask,
+ nodemask_t *node_alloc_noretry)
{
struct page *page;
@@ -1435,7 +1465,7 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
else
page = alloc_buddy_huge_page(h, gfp_mask,
- nid, nmask);
+ nid, nmask, node_alloc_noretry);
if (!page)
return NULL;
@@ -1450,14 +1480,16 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
* Allocates a fresh page to the hugetlb allocator pool in the node interleaved
* manner.
*/
-static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
+ nodemask_t *node_alloc_noretry)
{
struct page *page;
int nr_nodes, node;
gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
- page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed);
+ page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
+ node_alloc_noretry);
if (page)
break;
}
@@ -1601,7 +1633,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
goto out_unlock;
spin_unlock(&hugetlb_lock);
- page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
+ page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
if (!page)
return NULL;
@@ -1637,7 +1669,7 @@ struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
if (hstate_is_gigantic(h))
return NULL;
- page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
+ page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
if (!page)
return NULL;
@@ -2207,13 +2239,33 @@ static void __init gather_bootmem_prealloc(void)
static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
{
unsigned long i;
+ nodemask_t *node_alloc_noretry;
+
+ if (!hstate_is_gigantic(h)) {
+ /*
+ * Bit mask controlling how hard we retry per-node allocations.
+ * Ignore errors as lower level routines can deal with
+ * node_alloc_noretry == NULL. If this kmalloc fails at boot
+ * time, we are likely in bigger trouble.
+ */
+ node_alloc_noretry = kmalloc(sizeof(*node_alloc_noretry),
+ GFP_KERNEL);
+ } else {
+ /* allocations done at boot time */
+ node_alloc_noretry = NULL;
+ }
+
+ /* bit mask controlling how hard we retry per-node allocations */
+ if (node_alloc_noretry)
+ nodes_clear(*node_alloc_noretry);
for (i = 0; i < h->max_huge_pages; ++i) {
if (hstate_is_gigantic(h)) {
if (!alloc_bootmem_huge_page(h))
break;
} else if (!alloc_pool_huge_page(h,
- &node_states[N_MEMORY]))
+ &node_states[N_MEMORY],
+ node_alloc_noretry))
break;
cond_resched();
}
@@ -2225,6 +2277,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
h->max_huge_pages, buf, i);
h->max_huge_pages = i;
}
+
+ kfree(node_alloc_noretry);
}
static void __init hugetlb_init_hstates(void)
@@ -2323,6 +2377,17 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
nodemask_t *nodes_allowed)
{
unsigned long min_count, ret;
+ NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL);
+
+ /*
+ * Bit mask controlling how hard we retry per-node allocations.
+ * If we can not allocate the bit mask, do not attempt to allocate
+ * the requested huge pages.
+ */
+ if (node_alloc_noretry)
+ nodes_clear(*node_alloc_noretry);
+ else
+ return -ENOMEM;
spin_lock(&hugetlb_lock);
@@ -2356,6 +2421,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
if (hstate_is_gigantic(h) && !IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
if (count > persistent_huge_pages(h)) {
spin_unlock(&hugetlb_lock);
+ NODEMASK_FREE(node_alloc_noretry);
return -EINVAL;
}
/* Fall through to decrease pool */
@@ -2388,7 +2454,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
/* yield cpu to avoid soft lockup */
cond_resched();
- ret = alloc_pool_huge_page(h, nodes_allowed);
+ ret = alloc_pool_huge_page(h, nodes_allowed,
+ node_alloc_noretry);
spin_lock(&hugetlb_lock);
if (!ret)
goto out;
@@ -2429,6 +2496,8 @@ out:
h->max_huge_pages = persistent_huge_pages(h);
spin_unlock(&hugetlb_lock);
+ NODEMASK_FREE(node_alloc_noretry);
+
return 0;
}
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 68c2f2f3c05b..f1930fa0b445 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -139,7 +139,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
if (!page_hcg || page_hcg != h_cg)
goto out;
- nr_pages = 1 << compound_order(page);
+ nr_pages = compound_nr(page);
if (!parent) {
parent = root_h_cgroup;
/* root has no limit */
diff --git a/mm/init-mm.c b/mm/init-mm.c
index a787a319211e..fb1e15028ef0 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -35,6 +35,6 @@ struct mm_struct init_mm = {
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
- .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
+ .cpu_bitmap = CPU_BITS_NONE,
INIT_MM_CONTEXT(init_mm)
};
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 95d16a42db6b..6814d6d6a023 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -304,7 +304,6 @@ size_t kasan_metadata_size(struct kmem_cache *cache)
struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
const void *object)
{
- BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
return (void *)object + cache->kasan_info.alloc_meta_offset;
}
@@ -315,14 +314,31 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
return (void *)object + cache->kasan_info.free_meta_offset;
}
+
+static void kasan_set_free_info(struct kmem_cache *cache,
+ void *object, u8 tag)
+{
+ struct kasan_alloc_meta *alloc_meta;
+ u8 idx = 0;
+
+ alloc_meta = get_alloc_info(cache, object);
+
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+ idx = alloc_meta->free_track_idx;
+ alloc_meta->free_pointer_tag[idx] = tag;
+ alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
+#endif
+
+ set_track(&alloc_meta->free_track[idx], GFP_NOWAIT);
+}
+
void kasan_poison_slab(struct page *page)
{
unsigned long i;
- for (i = 0; i < (1 << compound_order(page)); i++)
+ for (i = 0; i < compound_nr(page); i++)
page_kasan_tag_reset(page + i);
- kasan_poison_shadow(page_address(page),
- PAGE_SIZE << compound_order(page),
+ kasan_poison_shadow(page_address(page), page_size(page),
KASAN_KMALLOC_REDZONE);
}
@@ -452,7 +468,8 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
unlikely(!(cache->flags & SLAB_KASAN)))
return false;
- set_track(&get_alloc_info(cache, object)->free_track, GFP_NOWAIT);
+ kasan_set_free_info(cache, object, tag);
+
quarantine_put(get_free_info(cache, object), cache);
return IS_ENABLED(CONFIG_KASAN_GENERIC);
@@ -524,7 +541,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
page = virt_to_page(ptr);
redzone_start = round_up((unsigned long)(ptr + size),
KASAN_SHADOW_SCALE_SIZE);
- redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page));
+ redzone_end = (unsigned long)ptr + page_size(page);
kasan_unpoison_shadow(ptr, size);
kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
@@ -560,8 +577,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip)
kasan_report_invalid_free(ptr, ip);
return;
}
- kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
- KASAN_FREE_PAGE);
+ kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE);
} else {
__kasan_slab_free(page->slab_cache, ptr, ip, false);
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 014f19e76247..35cff6bbb716 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -95,9 +95,19 @@ struct kasan_track {
depot_stack_handle_t stack;
};
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#define KASAN_NR_FREE_STACKS 5
+#else
+#define KASAN_NR_FREE_STACKS 1
+#endif
+
struct kasan_alloc_meta {
struct kasan_track alloc_track;
- struct kasan_track free_track;
+ struct kasan_track free_track[KASAN_NR_FREE_STACKS];
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+ u8 free_pointer_tag[KASAN_NR_FREE_STACKS];
+ u8 free_track_idx;
+#endif
};
struct qlist_node {
@@ -146,6 +156,8 @@ void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);
void kasan_report_invalid_free(void *object, unsigned long ip);
+struct page *kasan_addr_to_page(const void *addr);
+
#if defined(CONFIG_KASAN_GENERIC) && \
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 0e5f965f1882..621782100eaa 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -111,7 +111,7 @@ static void print_track(struct kasan_track *track, const char *prefix)
}
}
-static struct page *addr_to_page(const void *addr)
+struct page *kasan_addr_to_page(const void *addr)
{
if ((addr >= (void *)PAGE_OFFSET) &&
(addr < high_memory))
@@ -151,15 +151,38 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,
(void *)(object_addr + cache->object_size));
}
+static struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
+ void *object, u8 tag)
+{
+ struct kasan_alloc_meta *alloc_meta;
+ int i = 0;
+
+ alloc_meta = get_alloc_info(cache, object);
+
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+ for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
+ if (alloc_meta->free_pointer_tag[i] == tag)
+ break;
+ }
+ if (i == KASAN_NR_FREE_STACKS)
+ i = alloc_meta->free_track_idx;
+#endif
+
+ return &alloc_meta->free_track[i];
+}
+
static void describe_object(struct kmem_cache *cache, void *object,
- const void *addr)
+ const void *addr, u8 tag)
{
struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
if (cache->flags & SLAB_KASAN) {
+ struct kasan_track *free_track;
+
print_track(&alloc_info->alloc_track, "Allocated");
pr_err("\n");
- print_track(&alloc_info->free_track, "Freed");
+ free_track = kasan_get_free_track(cache, object, tag);
+ print_track(free_track, "Freed");
pr_err("\n");
}
@@ -344,9 +367,9 @@ static void print_address_stack_frame(const void *addr)
print_decoded_frame_descr(frame_descr);
}
-static void print_address_description(void *addr)
+static void print_address_description(void *addr, u8 tag)
{
- struct page *page = addr_to_page(addr);
+ struct page *page = kasan_addr_to_page(addr);
dump_stack();
pr_err("\n");
@@ -355,7 +378,7 @@ static void print_address_description(void *addr)
struct kmem_cache *cache = page->slab_cache;
void *object = nearest_obj(cache, page, addr);
- describe_object(cache, object, addr);
+ describe_object(cache, object, addr, tag);
}
if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) {
@@ -435,13 +458,14 @@ static bool report_enabled(void)
void kasan_report_invalid_free(void *object, unsigned long ip)
{
unsigned long flags;
+ u8 tag = get_tag(object);
+ object = reset_tag(object);
start_report(&flags);
pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip);
- print_tags(get_tag(object), reset_tag(object));
- object = reset_tag(object);
+ print_tags(tag, object);
pr_err("\n");
- print_address_description(object);
+ print_address_description(object, tag);
pr_err("\n");
print_shadow_for_address(object);
end_report(&flags);
@@ -479,7 +503,7 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
pr_err("\n");
if (addr_has_shadow(untagged_addr)) {
- print_address_description(untagged_addr);
+ print_address_description(untagged_addr, get_tag(tagged_addr));
pr_err("\n");
print_shadow_for_address(info.first_bad_addr);
} else {
diff --git a/mm/kasan/tags_report.c b/mm/kasan/tags_report.c
index 8eaf5f722271..969ae08f59d7 100644
--- a/mm/kasan/tags_report.c
+++ b/mm/kasan/tags_report.c
@@ -36,6 +36,30 @@
const char *get_bug_type(struct kasan_access_info *info)
{
+#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+ struct kasan_alloc_meta *alloc_meta;
+ struct kmem_cache *cache;
+ struct page *page;
+ const void *addr;
+ void *object;
+ u8 tag;
+ int i;
+
+ tag = get_tag(info->access_addr);
+ addr = reset_tag(info->access_addr);
+ page = kasan_addr_to_page(addr);
+ if (page && PageSlab(page)) {
+ cache = page->slab_cache;
+ object = nearest_obj(cache, page, (void *)addr);
+ alloc_meta = get_alloc_info(cache, object);
+
+ for (i = 0; i < KASAN_NR_FREE_STACKS; i++)
+ if (alloc_meta->free_pointer_tag[i] == tag)
+ return "use-after-free";
+ return "out-of-bounds";
+ }
+
+#endif
return "invalid-access";
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ccede2425c3f..0a1b4b484ac5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -48,6 +48,7 @@ enum scan_result {
SCAN_CGROUP_CHARGE_FAIL,
SCAN_EXCEED_SWAP_PTE,
SCAN_TRUNCATED,
+ SCAN_PAGE_HAS_PRIVATE,
};
#define CREATE_TRACE_POINTS
@@ -76,6 +77,8 @@ static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
static struct kmem_cache *mm_slot_cache __read_mostly;
+#define MAX_PTE_MAPPED_THP 8
+
/**
* struct mm_slot - hash lookup from mm to mm_slot
* @hash: hash collision list
@@ -86,6 +89,10 @@ struct mm_slot {
struct hlist_node hash;
struct list_head mm_node;
struct mm_struct *mm;
+
+ /* pte-mapped THP in this mm */
+ int nr_pte_mapped_thp;
+ unsigned long pte_mapped_thp[MAX_PTE_MAPPED_THP];
};
/**
@@ -404,7 +411,11 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
(vm_flags & VM_NOHUGEPAGE) ||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
return false;
- if (shmem_file(vma->vm_file)) {
+
+ if (shmem_file(vma->vm_file) ||
+ (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ vma->vm_file &&
+ (vm_flags & VM_DENYWRITE))) {
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
return false;
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +467,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long hstart, hend;
/*
- * khugepaged does not yet work on non-shmem files or special
- * mappings. And file-private shmem THP is not supported.
+ * khugepaged only supports read-only files for non-shmem files.
+ * khugepaged does not yet work on special mappings. And
+ * file-private shmem THP is not supported.
*/
if (!hugepage_vma_check(vma, vm_flags))
return 0;
@@ -1248,6 +1260,159 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
}
#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
+/*
+ * Notify khugepaged that given addr of the mm is pte-mapped THP. Then
+ * khugepaged should try to collapse the page table.
+ */
+static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
+ unsigned long addr)
+{
+ struct mm_slot *mm_slot;
+
+ VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+
+ spin_lock(&khugepaged_mm_lock);
+ mm_slot = get_mm_slot(mm);
+ if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP))
+ mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr;
+ spin_unlock(&khugepaged_mm_lock);
+ return 0;
+}
+
+/**
+ * Try to collapse a pte-mapped THP for mm at address haddr.
+ *
+ * This function checks whether all the PTEs in the PMD are pointing to the
+ * right THP. If so, retract the page table so the THP can refault in with
+ * as pmd-mapped.
+ */
+void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+{
+ unsigned long haddr = addr & HPAGE_PMD_MASK;
+ struct vm_area_struct *vma = find_vma(mm, haddr);
+ struct page *hpage = NULL;
+ pte_t *start_pte, *pte;
+ pmd_t *pmd, _pmd;
+ spinlock_t *ptl;
+ int count = 0;
+ int i;
+
+ if (!vma || !vma->vm_file ||
+ vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+ return;
+
+ /*
+ * This vm_flags may not have VM_HUGEPAGE if the page was not
+ * collapsed by this mm. But we can still collapse if the page is
+ * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check()
+ * will not fail the vma for missing VM_HUGEPAGE
+ */
+ if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
+ return;
+
+ pmd = mm_find_pmd(mm, haddr);
+ if (!pmd)
+ return;
+
+ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+ /* step 1: check all mapped PTEs are to the right huge page */
+ for (i = 0, addr = haddr, pte = start_pte;
+ i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+ struct page *page;
+
+ /* empty pte, skip */
+ if (pte_none(*pte))
+ continue;
+
+ /* page swapped out, abort */
+ if (!pte_present(*pte))
+ goto abort;
+
+ page = vm_normal_page(vma, addr, *pte);
+
+ if (!page || !PageCompound(page))
+ goto abort;
+
+ if (!hpage) {
+ hpage = compound_head(page);
+ /*
+ * The mapping of the THP should not change.
+ *
+ * Note that uprobe, debugger, or MAP_PRIVATE may
+ * change the page table, but the new page will
+ * not pass PageCompound() check.
+ */
+ if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
+ goto abort;
+ }
+
+ /*
+ * Confirm the page maps to the correct subpage.
+ *
+ * Note that uprobe, debugger, or MAP_PRIVATE may change
+ * the page table, but the new page will not pass
+ * PageCompound() check.
+ */
+ if (WARN_ON(hpage + i != page))
+ goto abort;
+ count++;
+ }
+
+ /* step 2: adjust rmap */
+ for (i = 0, addr = haddr, pte = start_pte;
+ i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+ struct page *page;
+
+ if (pte_none(*pte))
+ continue;
+ page = vm_normal_page(vma, addr, *pte);
+ page_remove_rmap(page, false);
+ }
+
+ pte_unmap_unlock(start_pte, ptl);
+
+ /* step 3: set proper refcount and mm_counters. */
+ if (hpage) {
+ page_ref_sub(hpage, count);
+ add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
+ }
+
+ /* step 4: collapse pmd */
+ ptl = pmd_lock(vma->vm_mm, pmd);
+ _pmd = pmdp_collapse_flush(vma, addr, pmd);
+ spin_unlock(ptl);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+ return;
+
+abort:
+ pte_unmap_unlock(start_pte, ptl);
+}
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+ struct mm_struct *mm = mm_slot->mm;
+ int i;
+
+ if (likely(mm_slot->nr_pte_mapped_thp == 0))
+ return 0;
+
+ if (!down_write_trylock(&mm->mmap_sem))
+ return -EBUSY;
+
+ if (unlikely(khugepaged_test_exit(mm)))
+ goto out;
+
+ for (i = 0; i < mm_slot->nr_pte_mapped_thp; i++)
+ collapse_pte_mapped_thp(mm, mm_slot->pte_mapped_thp[i]);
+
+out:
+ mm_slot->nr_pte_mapped_thp = 0;
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+
static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
{
struct vm_area_struct *vma;
@@ -1256,7 +1421,22 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
i_mmap_lock_write(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
- /* probably overkill */
+ /*
+ * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
+ * got written to. These VMAs are likely not worth investing
+ * down_write(mmap_sem) as PMD-mapping is likely to be split
+ * later.
+ *
+ * Not that vma->anon_vma check is racy: it can be set up after
+ * the check but before we took mmap_sem by the fault path.
+ * But page lock would prevent establishing any new ptes of the
+ * page, so we are safe.
+ *
+ * An alternative would be drop the check, but check that page
+ * table is clear before calling pmdp_collapse_flush() under
+ * ptl. It has higher chance to recover THP for the VMA, but
+ * has higher cost too.
+ */
if (vma->anon_vma)
continue;
addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
@@ -1269,9 +1449,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
continue;
/*
* We need exclusive mmap_sem to retract page table.
- * If trylock fails we would end up with pte-mapped THP after
- * re-fault. Not ideal, but it's more important to not disturb
- * the system too much.
+ *
+ * We use trylock due to lock inversion: we need to acquire
+ * mmap_sem while holding page lock. Fault path does it in
+ * reverse order. Trylock is a way to avoid deadlock.
*/
if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
@@ -1281,18 +1462,21 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
up_write(&vma->vm_mm->mmap_sem);
mm_dec_nr_ptes(vma->vm_mm);
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+ } else {
+ /* Try again later */
+ khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
}
}
i_mmap_unlock_write(mapping);
}
/**
- * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
+ * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
*
* Basic scheme is simple, details are more complex:
* - allocate and lock a new huge page;
* - scan page cache replacing old pages with the new one
- * + swap in pages if necessary;
+ * + swap/gup in pages if necessary;
* + fill in gaps;
* + keep old pages around in case rollback is required;
* - if replacing succeeds:
@@ -1304,10 +1488,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* + restore gaps in the page cache;
* + unlock and free huge page;
*/
-static void collapse_shmem(struct mm_struct *mm,
- struct address_space *mapping, pgoff_t start,
+static void collapse_file(struct mm_struct *mm,
+ struct file *file, pgoff_t start,
struct page **hpage, int node)
{
+ struct address_space *mapping = file->f_mapping;
gfp_t gfp;
struct page *new_page;
struct mem_cgroup *memcg;
@@ -1315,7 +1500,9 @@ static void collapse_shmem(struct mm_struct *mm,
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
int nr_none = 0, result = SCAN_SUCCEED;
+ bool is_shmem = shmem_file(file);
+ VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
/* Only allocate from the target node */
@@ -1347,7 +1534,8 @@ static void collapse_shmem(struct mm_struct *mm,
} while (1);
__SetPageLocked(new_page);
- __SetPageSwapBacked(new_page);
+ if (is_shmem)
+ __SetPageSwapBacked(new_page);
new_page->index = start;
new_page->mapping = mapping;
@@ -1362,41 +1550,75 @@ static void collapse_shmem(struct mm_struct *mm,
struct page *page = xas_next(&xas);
VM_BUG_ON(index != xas.xa_index);
- if (!page) {
- /*
- * Stop if extent has been truncated or hole-punched,
- * and is now completely empty.
- */
- if (index == start) {
- if (!xas_next_entry(&xas, end - 1)) {
- result = SCAN_TRUNCATED;
+ if (is_shmem) {
+ if (!page) {
+ /*
+ * Stop if extent has been truncated or
+ * hole-punched, and is now completely
+ * empty.
+ */
+ if (index == start) {
+ if (!xas_next_entry(&xas, end - 1)) {
+ result = SCAN_TRUNCATED;
+ goto xa_locked;
+ }
+ xas_set(&xas, index);
+ }
+ if (!shmem_charge(mapping->host, 1)) {
+ result = SCAN_FAIL;
goto xa_locked;
}
- xas_set(&xas, index);
+ xas_store(&xas, new_page);
+ nr_none++;
+ continue;
}
- if (!shmem_charge(mapping->host, 1)) {
- result = SCAN_FAIL;
+
+ if (xa_is_value(page) || !PageUptodate(page)) {
+ xas_unlock_irq(&xas);
+ /* swap in or instantiate fallocated page */
+ if (shmem_getpage(mapping->host, index, &page,
+ SGP_NOHUGE)) {
+ result = SCAN_FAIL;
+ goto xa_unlocked;
+ }
+ } else if (trylock_page(page)) {
+ get_page(page);
+ xas_unlock_irq(&xas);
+ } else {
+ result = SCAN_PAGE_LOCK;
goto xa_locked;
}
- xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
- nr_none++;
- continue;
- }
-
- if (xa_is_value(page) || !PageUptodate(page)) {
- xas_unlock_irq(&xas);
- /* swap in or instantiate fallocated page */
- if (shmem_getpage(mapping->host, index, &page,
- SGP_NOHUGE)) {
+ } else { /* !is_shmem */
+ if (!page || xa_is_value(page)) {
+ xas_unlock_irq(&xas);
+ page_cache_sync_readahead(mapping, &file->f_ra,
+ file, index,
+ PAGE_SIZE);
+ /* drain pagevecs to help isolate_lru_page() */
+ lru_add_drain();
+ page = find_lock_page(mapping, index);
+ if (unlikely(page == NULL)) {
+ result = SCAN_FAIL;
+ goto xa_unlocked;
+ }
+ } else if (!PageUptodate(page)) {
+ xas_unlock_irq(&xas);
+ wait_on_page_locked(page);
+ if (!trylock_page(page)) {
+ result = SCAN_PAGE_LOCK;
+ goto xa_unlocked;
+ }
+ get_page(page);
+ } else if (PageDirty(page)) {
result = SCAN_FAIL;
- goto xa_unlocked;
+ goto xa_locked;
+ } else if (trylock_page(page)) {
+ get_page(page);
+ xas_unlock_irq(&xas);
+ } else {
+ result = SCAN_PAGE_LOCK;
+ goto xa_locked;
}
- } else if (trylock_page(page)) {
- get_page(page);
- xas_unlock_irq(&xas);
- } else {
- result = SCAN_PAGE_LOCK;
- goto xa_locked;
}
/*
@@ -1425,6 +1647,12 @@ static void collapse_shmem(struct mm_struct *mm,
goto out_unlock;
}
+ if (page_has_private(page) &&
+ !try_to_release_page(page, GFP_KERNEL)) {
+ result = SCAN_PAGE_HAS_PRIVATE;
+ goto out_unlock;
+ }
+
if (page_mapped(page))
unmap_mapping_pages(mapping, index, 1, false);
@@ -1454,7 +1682,7 @@ static void collapse_shmem(struct mm_struct *mm,
list_add_tail(&page->lru, &pagelist);
/* Finally, replace with the new page. */
- xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+ xas_store(&xas, new_page);
continue;
out_unlock:
unlock_page(page);
@@ -1462,12 +1690,20 @@ out_unlock:
goto xa_unlocked;
}
- __inc_node_page_state(new_page, NR_SHMEM_THPS);
+ if (is_shmem)
+ __inc_node_page_state(new_page, NR_SHMEM_THPS);
+ else {
+ __inc_node_page_state(new_page, NR_FILE_THPS);
+ filemap_nr_thps_inc(mapping);
+ }
+
if (nr_none) {
struct zone *zone = page_zone(new_page);
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+ if (is_shmem)
+ __mod_node_page_state(zone->zone_pgdat,
+ NR_SHMEM, nr_none);
}
xa_locked:
@@ -1505,10 +1741,15 @@ xa_unlocked:
SetPageUptodate(new_page);
page_ref_add(new_page, HPAGE_PMD_NR - 1);
- set_page_dirty(new_page);
mem_cgroup_commit_charge(new_page, memcg, false, true);
+
+ if (is_shmem) {
+ set_page_dirty(new_page);
+ lru_cache_add_anon(new_page);
+ } else {
+ lru_cache_add_file(new_page);
+ }
count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
- lru_cache_add_anon(new_page);
/*
* Remove pte page tables, so we can re-fault the page as huge.
@@ -1523,7 +1764,9 @@ xa_unlocked:
/* Something went wrong: roll back page cache changes */
xas_lock_irq(&xas);
mapping->nrpages -= nr_none;
- shmem_uncharge(mapping->host, nr_none);
+
+ if (is_shmem)
+ shmem_uncharge(mapping->host, nr_none);
xas_set(&xas, start);
xas_for_each(&xas, page, end - 1) {
@@ -1563,11 +1806,11 @@ out:
/* TODO: tracepoints */
}
-static void khugepaged_scan_shmem(struct mm_struct *mm,
- struct address_space *mapping,
- pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+ struct file *file, pgoff_t start, struct page **hpage)
{
struct page *page = NULL;
+ struct address_space *mapping = file->f_mapping;
XA_STATE(xas, &mapping->i_pages, start);
int present, swap;
int node = NUMA_NO_NODE;
@@ -1606,7 +1849,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
break;
}
- if (page_count(page) != 1 + page_mapcount(page)) {
+ if (page_count(page) !=
+ 1 + page_mapcount(page) + page_has_private(page)) {
result = SCAN_PAGE_COUNT;
break;
}
@@ -1631,19 +1875,23 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
result = SCAN_EXCEED_NONE_PTE;
} else {
node = khugepaged_find_target_node();
- collapse_shmem(mm, mapping, start, hpage, node);
+ collapse_file(mm, file, start, hpage, node);
}
}
/* TODO: tracepoints */
}
#else
-static void khugepaged_scan_shmem(struct mm_struct *mm,
- struct address_space *mapping,
- pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm,
+ struct file *file, pgoff_t start, struct page **hpage)
{
BUILD_BUG();
}
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+ return 0;
+}
#endif
static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
@@ -1668,6 +1916,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
khugepaged_scan.mm_slot = mm_slot;
}
spin_unlock(&khugepaged_mm_lock);
+ khugepaged_collapse_pte_mapped_thps(mm_slot);
mm = mm_slot->mm;
/*
@@ -1713,17 +1962,18 @@ skip:
VM_BUG_ON(khugepaged_scan.address < hstart ||
khugepaged_scan.address + HPAGE_PMD_SIZE >
hend);
- if (shmem_file(vma->vm_file)) {
+ if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
struct file *file;
pgoff_t pgoff = linear_page_index(vma,
khugepaged_scan.address);
- if (!shmem_huge_enabled(vma))
+
+ if (shmem_file(vma->vm_file)
+ && !shmem_huge_enabled(vma))
goto skip;
file = get_file(vma->vm_file);
up_read(&mm->mmap_sem);
ret = 1;
- khugepaged_scan_shmem(mm, file->f_mapping,
- pgoff, hpage);
+ khugepaged_scan_file(mm, file, pgoff, hpage);
fput(file);
} else {
ret = khugepaged_scan_pmd(mm, vma,
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index f6e602918dac..03a8d84badad 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -168,6 +168,8 @@ struct kmemleak_object {
#define OBJECT_REPORTED (1 << 1)
/* flag set to not scan the object */
#define OBJECT_NO_SCAN (1 << 2)
+/* flag set to fully scan the object when scan_area allocation failed */
+#define OBJECT_FULL_SCAN (1 << 3)
#define HEX_PREFIX " "
/* number of bytes to print per line; must be 16 or 32 */
@@ -183,6 +185,10 @@ struct kmemleak_object {
static LIST_HEAD(object_list);
/* the list of gray-colored objects (see color_gray comment below) */
static LIST_HEAD(gray_list);
+/* memory pool allocation */
+static struct kmemleak_object mem_pool[CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE];
+static int mem_pool_free_count = ARRAY_SIZE(mem_pool);
+static LIST_HEAD(mem_pool_free_list);
/* search tree for object boundaries */
static struct rb_root object_tree_root = RB_ROOT;
/* rw_lock protecting the access to object_list and object_tree_root */
@@ -193,13 +199,11 @@ static struct kmem_cache *object_cache;
static struct kmem_cache *scan_area_cache;
/* set if tracing memory operations is enabled */
-static int kmemleak_enabled;
+static int kmemleak_enabled = 1;
/* same as above but only for the kmemleak_free() callback */
-static int kmemleak_free_enabled;
+static int kmemleak_free_enabled = 1;
/* set in the late_initcall if there were no errors */
static int kmemleak_initialized;
-/* enables or disables early logging of the memory operations */
-static int kmemleak_early_log = 1;
/* set if a kmemleak warning was issued */
static int kmemleak_warning;
/* set if a fatal kmemleak error has occurred */
@@ -227,49 +231,6 @@ static bool kmemleak_found_leaks;
static bool kmemleak_verbose;
module_param_named(verbose, kmemleak_verbose, bool, 0600);
-/*
- * Early object allocation/freeing logging. Kmemleak is initialized after the
- * kernel allocator. However, both the kernel allocator and kmemleak may
- * allocate memory blocks which need to be tracked. Kmemleak defines an
- * arbitrary buffer to hold the allocation/freeing information before it is
- * fully initialized.
- */
-
-/* kmemleak operation type for early logging */
-enum {
- KMEMLEAK_ALLOC,
- KMEMLEAK_ALLOC_PERCPU,
- KMEMLEAK_FREE,
- KMEMLEAK_FREE_PART,
- KMEMLEAK_FREE_PERCPU,
- KMEMLEAK_NOT_LEAK,
- KMEMLEAK_IGNORE,
- KMEMLEAK_SCAN_AREA,
- KMEMLEAK_NO_SCAN,
- KMEMLEAK_SET_EXCESS_REF
-};
-
-/*
- * Structure holding the information passed to kmemleak callbacks during the
- * early logging.
- */
-struct early_log {
- int op_type; /* kmemleak operation type */
- int min_count; /* minimum reference count */
- const void *ptr; /* allocated/freed memory block */
- union {
- size_t size; /* memory block size */
- unsigned long excess_ref; /* surplus reference passing */
- };
- unsigned long trace[MAX_TRACE]; /* stack trace */
- unsigned int trace_len; /* stack trace length */
-};
-
-/* early logging buffer and current position */
-static struct early_log
- early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
-static int crt_early_log __initdata;
-
static void kmemleak_disable(void);
/*
@@ -450,6 +411,54 @@ static int get_object(struct kmemleak_object *object)
}
/*
+ * Memory pool allocation and freeing. kmemleak_lock must not be held.
+ */
+static struct kmemleak_object *mem_pool_alloc(gfp_t gfp)
+{
+ unsigned long flags;
+ struct kmemleak_object *object;
+
+ /* try the slab allocator first */
+ if (object_cache) {
+ object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+ if (object)
+ return object;
+ }
+
+ /* slab allocation failed, try the memory pool */
+ write_lock_irqsave(&kmemleak_lock, flags);
+ object = list_first_entry_or_null(&mem_pool_free_list,
+ typeof(*object), object_list);
+ if (object)
+ list_del(&object->object_list);
+ else if (mem_pool_free_count)
+ object = &mem_pool[--mem_pool_free_count];
+ else
+ pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n");
+ write_unlock_irqrestore(&kmemleak_lock, flags);
+
+ return object;
+}
+
+/*
+ * Return the object to either the slab allocator or the memory pool.
+ */
+static void mem_pool_free(struct kmemleak_object *object)
+{
+ unsigned long flags;
+
+ if (object < mem_pool || object >= mem_pool + ARRAY_SIZE(mem_pool)) {
+ kmem_cache_free(object_cache, object);
+ return;
+ }
+
+ /* add the object to the memory pool free list */
+ write_lock_irqsave(&kmemleak_lock, flags);
+ list_add(&object->object_list, &mem_pool_free_list);
+ write_unlock_irqrestore(&kmemleak_lock, flags);
+}
+
+/*
* RCU callback to free a kmemleak_object.
*/
static void free_object_rcu(struct rcu_head *rcu)
@@ -467,7 +476,7 @@ static void free_object_rcu(struct rcu_head *rcu)
hlist_del(&area->node);
kmem_cache_free(scan_area_cache, area);
}
- kmem_cache_free(object_cache, object);
+ mem_pool_free(object);
}
/*
@@ -485,7 +494,15 @@ static void put_object(struct kmemleak_object *object)
/* should only get here after delete_object was called */
WARN_ON(object->flags & OBJECT_ALLOCATED);
- call_rcu(&object->rcu, free_object_rcu);
+ /*
+ * It may be too early for the RCU callbacks, however, there is no
+ * concurrent object_list traversal when !object_cache and all objects
+ * came from the memory pool. Free the object directly.
+ */
+ if (object_cache)
+ call_rcu(&object->rcu, free_object_rcu);
+ else
+ free_object_rcu(&object->rcu);
}
/*
@@ -550,7 +567,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
struct rb_node **link, *rb_parent;
unsigned long untagged_ptr;
- object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+ object = mem_pool_alloc(gfp);
if (!object) {
pr_warn("Cannot allocate a kmemleak_object structure\n");
kmemleak_disable();
@@ -689,9 +706,7 @@ static void delete_object_part(unsigned long ptr, size_t size)
/*
* Create one or two objects that may result from the memory block
* split. Note that partial freeing is only done by free_bootmem() and
- * this happens before kmemleak_init() is called. The path below is
- * only executed during early log recording in kmemleak_init(), so
- * GFP_KERNEL is enough.
+ * this happens before kmemleak_init() is called.
*/
start = object->pointer;
end = object->pointer + object->size;
@@ -763,7 +778,7 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
{
unsigned long flags;
struct kmemleak_object *object;
- struct kmemleak_scan_area *area;
+ struct kmemleak_scan_area *area = NULL;
object = find_and_get_object(ptr, 1);
if (!object) {
@@ -772,13 +787,16 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
return;
}
- area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
- if (!area) {
- pr_warn("Cannot allocate a scan area\n");
- goto out;
- }
+ if (scan_area_cache)
+ area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
spin_lock_irqsave(&object->lock, flags);
+ if (!area) {
+ pr_warn_once("Cannot allocate a scan area, scanning the full object\n");
+ /* mark the object for full scan to avoid false positives */
+ object->flags |= OBJECT_FULL_SCAN;
+ goto out_unlock;
+ }
if (size == SIZE_MAX) {
size = object->pointer + object->size - ptr;
} else if (ptr + size > object->pointer + object->size) {
@@ -795,7 +813,6 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
hlist_add_head(&area->node, &object->area_list);
out_unlock:
spin_unlock_irqrestore(&object->lock, flags);
-out:
put_object(object);
}
@@ -845,86 +862,6 @@ static void object_no_scan(unsigned long ptr)
put_object(object);
}
-/*
- * Log an early kmemleak_* call to the early_log buffer. These calls will be
- * processed later once kmemleak is fully initialized.
- */
-static void __init log_early(int op_type, const void *ptr, size_t size,
- int min_count)
-{
- unsigned long flags;
- struct early_log *log;
-
- if (kmemleak_error) {
- /* kmemleak stopped recording, just count the requests */
- crt_early_log++;
- return;
- }
-
- if (crt_early_log >= ARRAY_SIZE(early_log)) {
- crt_early_log++;
- kmemleak_disable();
- return;
- }
-
- /*
- * There is no need for locking since the kernel is still in UP mode
- * at this stage. Disabling the IRQs is enough.
- */
- local_irq_save(flags);
- log = &early_log[crt_early_log];
- log->op_type = op_type;
- log->ptr = ptr;
- log->size = size;
- log->min_count = min_count;
- log->trace_len = __save_stack_trace(log->trace);
- crt_early_log++;
- local_irq_restore(flags);
-}
-
-/*
- * Log an early allocated block and populate the stack trace.
- */
-static void early_alloc(struct early_log *log)
-{
- struct kmemleak_object *object;
- unsigned long flags;
- int i;
-
- if (!kmemleak_enabled || !log->ptr || IS_ERR(log->ptr))
- return;
-
- /*
- * RCU locking needed to ensure object is not freed via put_object().
- */
- rcu_read_lock();
- object = create_object((unsigned long)log->ptr, log->size,
- log->min_count, GFP_ATOMIC);
- if (!object)
- goto out;
- spin_lock_irqsave(&object->lock, flags);
- for (i = 0; i < log->trace_len; i++)
- object->trace[i] = log->trace[i];
- object->trace_len = log->trace_len;
- spin_unlock_irqrestore(&object->lock, flags);
-out:
- rcu_read_unlock();
-}
-
-/*
- * Log an early allocated block and populate the stack trace.
- */
-static void early_alloc_percpu(struct early_log *log)
-{
- unsigned int cpu;
- const void __percpu *ptr = log->ptr;
-
- for_each_possible_cpu(cpu) {
- log->ptr = per_cpu_ptr(ptr, cpu);
- early_alloc(log);
- }
-}
-
/**
* kmemleak_alloc - register a newly allocated object
* @ptr: pointer to beginning of the object
@@ -946,8 +883,6 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
if (kmemleak_enabled && ptr && !IS_ERR(ptr))
create_object((unsigned long)ptr, size, min_count, gfp);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
}
EXPORT_SYMBOL_GPL(kmemleak_alloc);
@@ -975,8 +910,6 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
for_each_possible_cpu(cpu)
create_object((unsigned long)per_cpu_ptr(ptr, cpu),
size, 0, gfp);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
@@ -1001,11 +934,6 @@ void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp
create_object((unsigned long)area->addr, size, 2, gfp);
object_set_excess_ref((unsigned long)area,
(unsigned long)area->addr);
- } else if (kmemleak_early_log) {
- log_early(KMEMLEAK_ALLOC, area->addr, size, 2);
- /* reusing early_log.size for storing area->addr */
- log_early(KMEMLEAK_SET_EXCESS_REF,
- area, (unsigned long)area->addr, 0);
}
}
EXPORT_SYMBOL_GPL(kmemleak_vmalloc);
@@ -1023,8 +951,6 @@ void __ref kmemleak_free(const void *ptr)
if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
delete_object_full((unsigned long)ptr);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_FREE, ptr, 0, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_free);
@@ -1043,8 +969,6 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
if (kmemleak_enabled && ptr && !IS_ERR(ptr))
delete_object_part((unsigned long)ptr, size);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_free_part);
@@ -1065,8 +989,6 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
for_each_possible_cpu(cpu)
delete_object_full((unsigned long)per_cpu_ptr(ptr,
cpu));
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_FREE_PERCPU, ptr, 0, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
@@ -1117,8 +1039,6 @@ void __ref kmemleak_not_leak(const void *ptr)
if (kmemleak_enabled && ptr && !IS_ERR(ptr))
make_gray_object((unsigned long)ptr);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
}
EXPORT_SYMBOL(kmemleak_not_leak);
@@ -1137,8 +1057,6 @@ void __ref kmemleak_ignore(const void *ptr)
if (kmemleak_enabled && ptr && !IS_ERR(ptr))
make_black_object((unsigned long)ptr);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
}
EXPORT_SYMBOL(kmemleak_ignore);
@@ -1159,8 +1077,6 @@ void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
if (kmemleak_enabled && ptr && size && !IS_ERR(ptr))
add_scan_area((unsigned long)ptr, size, gfp);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
}
EXPORT_SYMBOL(kmemleak_scan_area);
@@ -1179,8 +1095,6 @@ void __ref kmemleak_no_scan(const void *ptr)
if (kmemleak_enabled && ptr && !IS_ERR(ptr))
object_no_scan((unsigned long)ptr);
- else if (kmemleak_early_log)
- log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
}
EXPORT_SYMBOL(kmemleak_no_scan);
@@ -1408,7 +1322,8 @@ static void scan_object(struct kmemleak_object *object)
if (!(object->flags & OBJECT_ALLOCATED))
/* already freed object */
goto out;
- if (hlist_empty(&object->area_list)) {
+ if (hlist_empty(&object->area_list) ||
+ object->flags & OBJECT_FULL_SCAN) {
void *start = (void *)object->pointer;
void *end = (void *)(object->pointer + object->size);
void *next;
@@ -1966,7 +1881,6 @@ static void kmemleak_disable(void)
/* stop any memory operation tracing */
kmemleak_enabled = 0;
- kmemleak_early_log = 0;
/* check whether it is too early for a kernel thread */
if (kmemleak_initialized)
@@ -1994,20 +1908,11 @@ static int __init kmemleak_boot_config(char *str)
}
early_param("kmemleak", kmemleak_boot_config);
-static void __init print_log_trace(struct early_log *log)
-{
- pr_notice("Early log backtrace:\n");
- stack_trace_print(log->trace, log->trace_len, 2);
-}
-
/*
* Kmemleak initialization.
*/
void __init kmemleak_init(void)
{
- int i;
- unsigned long flags;
-
#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
if (!kmemleak_skip_disable) {
kmemleak_disable();
@@ -2015,28 +1920,15 @@ void __init kmemleak_init(void)
}
#endif
+ if (kmemleak_error)
+ return;
+
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
- if (crt_early_log > ARRAY_SIZE(early_log))
- pr_warn("Early log buffer exceeded (%d), please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n",
- crt_early_log);
-
- /* the kernel is still in UP mode, so disabling the IRQs is enough */
- local_irq_save(flags);
- kmemleak_early_log = 0;
- if (kmemleak_error) {
- local_irq_restore(flags);
- return;
- } else {
- kmemleak_enabled = 1;
- kmemleak_free_enabled = 1;
- }
- local_irq_restore(flags);
-
/* register the data/bss sections */
create_object((unsigned long)_sdata, _edata - _sdata,
KMEMLEAK_GREY, GFP_ATOMIC);
@@ -2047,57 +1939,6 @@ void __init kmemleak_init(void)
create_object((unsigned long)__start_ro_after_init,
__end_ro_after_init - __start_ro_after_init,
KMEMLEAK_GREY, GFP_ATOMIC);
-
- /*
- * This is the point where tracking allocations is safe. Automatic
- * scanning is started during the late initcall. Add the early logged
- * callbacks to the kmemleak infrastructure.
- */
- for (i = 0; i < crt_early_log; i++) {
- struct early_log *log = &early_log[i];
-
- switch (log->op_type) {
- case KMEMLEAK_ALLOC:
- early_alloc(log);
- break;
- case KMEMLEAK_ALLOC_PERCPU:
- early_alloc_percpu(log);
- break;
- case KMEMLEAK_FREE:
- kmemleak_free(log->ptr);
- break;
- case KMEMLEAK_FREE_PART:
- kmemleak_free_part(log->ptr, log->size);
- break;
- case KMEMLEAK_FREE_PERCPU:
- kmemleak_free_percpu(log->ptr);
- break;
- case KMEMLEAK_NOT_LEAK:
- kmemleak_not_leak(log->ptr);
- break;
- case KMEMLEAK_IGNORE:
- kmemleak_ignore(log->ptr);
- break;
- case KMEMLEAK_SCAN_AREA:
- kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
- break;
- case KMEMLEAK_NO_SCAN:
- kmemleak_no_scan(log->ptr);
- break;
- case KMEMLEAK_SET_EXCESS_REF:
- object_set_excess_ref((unsigned long)log->ptr,
- log->excess_ref);
- break;
- default:
- kmemleak_warn("Unknown early log operation: %d\n",
- log->op_type);
- }
-
- if (kmemleak_warning) {
- print_log_trace(log);
- kmemleak_warning = 0;
- }
- }
}
/*
@@ -2126,7 +1967,8 @@ static int __init kmemleak_late_init(void)
mutex_unlock(&scan_mutex);
}
- pr_info("Kernel memory leak detector initialized\n");
+ pr_info("Kernel memory leak detector initialized (mem pool available: %d)\n",
+ mem_pool_free_count);
return 0;
}
diff --git a/mm/ksm.c b/mm/ksm.c
index 3dc4346411e4..dbee2eb4dd05 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1029,24 +1029,6 @@ static u32 calc_checksum(struct page *page)
return checksum;
}
-static int memcmp_pages(struct page *page1, struct page *page2)
-{
- char *addr1, *addr2;
- int ret;
-
- addr1 = kmap_atomic(page1);
- addr2 = kmap_atomic(page2);
- ret = memcmp(addr1, addr2, PAGE_SIZE);
- kunmap_atomic(addr2);
- kunmap_atomic(addr1);
- return ret;
-}
-
-static inline int pages_identical(struct page *page1, struct page *page2)
-{
- return !memcmp_pages(page1, page2);
-}
-
static int write_protect_page(struct vm_area_struct *vma, struct page *page,
pte_t *orig_pte)
{
diff --git a/mm/madvise.c b/mm/madvise.c
index 88babcc384b9..68ab988ad433 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -107,28 +107,14 @@ static long madvise_behavior(struct vm_area_struct *vma,
case MADV_MERGEABLE:
case MADV_UNMERGEABLE:
error = ksm_madvise(vma, start, end, behavior, &new_flags);
- if (error) {
- /*
- * madvise() returns EAGAIN if kernel resources, such as
- * slab, are temporarily unavailable.
- */
- if (error == -ENOMEM)
- error = -EAGAIN;
- goto out;
- }
+ if (error)
+ goto out_convert_errno;
break;
case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE:
error = hugepage_madvise(vma, &new_flags, behavior);
- if (error) {
- /*
- * madvise() returns EAGAIN if kernel resources, such as
- * slab, are temporarily unavailable.
- */
- if (error == -ENOMEM)
- error = -EAGAIN;
- goto out;
- }
+ if (error)
+ goto out_convert_errno;
break;
}
@@ -154,15 +140,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
goto out;
}
error = __split_vma(mm, vma, start, 1);
- if (error) {
- /*
- * madvise() returns EAGAIN if kernel resources, such as
- * slab, are temporarily unavailable.
- */
- if (error == -ENOMEM)
- error = -EAGAIN;
- goto out;
- }
+ if (error)
+ goto out_convert_errno;
}
if (end != vma->vm_end) {
@@ -171,15 +150,8 @@ static long madvise_behavior(struct vm_area_struct *vma,
goto out;
}
error = __split_vma(mm, vma, end, 0);
- if (error) {
- /*
- * madvise() returns EAGAIN if kernel resources, such as
- * slab, are temporarily unavailable.
- */
- if (error == -ENOMEM)
- error = -EAGAIN;
- goto out;
- }
+ if (error)
+ goto out_convert_errno;
}
success:
@@ -187,6 +159,14 @@ success:
* vm_flags is protected by the mmap_sem held in write mode.
*/
vma->vm_flags = new_flags;
+
+out_convert_errno:
+ /*
+ * madvise() returns EAGAIN if kernel resources, such as
+ * slab, are temporarily unavailable.
+ */
+ if (error == -ENOMEM)
+ error = -EAGAIN;
out:
return error;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f3c15bb07cce..2156ef775d04 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -57,6 +57,7 @@
#include <linux/lockdep.h>
#include <linux/file.h>
#include <linux/tracehook.h>
+#include <linux/psi.h>
#include <linux/seq_buf.h>
#include "internal.h"
#include <net/sock.h>
@@ -317,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
EXPORT_SYMBOL(memcg_kmem_enabled_key);
struct workqueue_struct *memcg_kmem_cache_wq;
+#endif
static int memcg_shrinker_map_size;
static DEFINE_MUTEX(memcg_shrinker_map_mutex);
@@ -440,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
}
}
-#else /* CONFIG_MEMCG_KMEM */
-static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
-{
- return 0;
-}
-static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
-#endif /* CONFIG_MEMCG_KMEM */
-
/**
* mem_cgroup_css_from_page - css of the memcg associated with a page
* @page: page of interest
@@ -2270,21 +2264,22 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *memcg;
+ bool flush = false;
+ rcu_read_lock();
memcg = stock->cached;
- if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
- continue;
- if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
- css_put(&memcg->css);
- continue;
- }
- if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
+ if (memcg && stock->nr_pages &&
+ mem_cgroup_is_descendant(memcg, root_memcg))
+ flush = true;
+ rcu_read_unlock();
+
+ if (flush &&
+ !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
if (cpu == curcpu)
drain_local_stock(&stock->work);
else
schedule_work_on(cpu, &stock->work);
}
- css_put(&memcg->css);
}
put_cpu();
mutex_unlock(&percpu_charge_mutex);
@@ -2359,11 +2354,67 @@ static void high_work_func(struct work_struct *work)
}
/*
+ * Clamp the maximum sleep time per allocation batch to 2 seconds. This is
+ * enough to still cause a significant slowdown in most cases, while still
+ * allowing diagnostics and tracing to proceed without becoming stuck.
+ */
+#define MEMCG_MAX_HIGH_DELAY_JIFFIES (2UL*HZ)
+
+/*
+ * When calculating the delay, we use these either side of the exponentiation to
+ * maintain precision and scale to a reasonable number of jiffies (see the table
+ * below.
+ *
+ * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the
+ * overage ratio to a delay.
+ * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the
+ * proposed penalty in order to reduce to a reasonable number of jiffies, and
+ * to produce a reasonable delay curve.
+ *
+ * MEMCG_DELAY_SCALING_SHIFT just happens to be a number that produces a
+ * reasonable delay curve compared to precision-adjusted overage, not
+ * penalising heavily at first, but still making sure that growth beyond the
+ * limit penalises misbehaviour cgroups by slowing them down exponentially. For
+ * example, with a high of 100 megabytes:
+ *
+ * +-------+------------------------+
+ * | usage | time to allocate in ms |
+ * +-------+------------------------+
+ * | 100M | 0 |
+ * | 101M | 6 |
+ * | 102M | 25 |
+ * | 103M | 57 |
+ * | 104M | 102 |
+ * | 105M | 159 |
+ * | 106M | 230 |
+ * | 107M | 313 |
+ * | 108M | 409 |
+ * | 109M | 518 |
+ * | 110M | 639 |
+ * | 111M | 774 |
+ * | 112M | 921 |
+ * | 113M | 1081 |
+ * | 114M | 1254 |
+ * | 115M | 1439 |
+ * | 116M | 1638 |
+ * | 117M | 1849 |
+ * | 118M | 2000 |
+ * | 119M | 2000 |
+ * | 120M | 2000 |
+ * +-------+------------------------+
+ */
+ #define MEMCG_DELAY_PRECISION_SHIFT 20
+ #define MEMCG_DELAY_SCALING_SHIFT 14
+
+/*
* Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit.
*/
void mem_cgroup_handle_over_high(void)
{
+ unsigned long usage, high, clamped_high;
+ unsigned long pflags;
+ unsigned long penalty_jiffies, overage;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
struct mem_cgroup *memcg;
@@ -2372,8 +2423,75 @@ void mem_cgroup_handle_over_high(void)
memcg = get_mem_cgroup_from_mm(current->mm);
reclaim_high(memcg, nr_pages, GFP_KERNEL);
- css_put(&memcg->css);
current->memcg_nr_pages_over_high = 0;
+
+ /*
+ * memory.high is breached and reclaim is unable to keep up. Throttle
+ * allocators proactively to slow down excessive growth.
+ *
+ * We use overage compared to memory.high to calculate the number of
+ * jiffies to sleep (penalty_jiffies). Ideally this value should be
+ * fairly lenient on small overages, and increasingly harsh when the
+ * memcg in question makes it clear that it has no intention of stopping
+ * its crazy behaviour, so we exponentially increase the delay based on
+ * overage amount.
+ */
+
+ usage = page_counter_read(&memcg->memory);
+ high = READ_ONCE(memcg->high);
+
+ if (usage <= high)
+ goto out;
+
+ /*
+ * Prevent division by 0 in overage calculation by acting as if it was a
+ * threshold of 1 page
+ */
+ clamped_high = max(high, 1UL);
+
+ overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
+ clamped_high);
+
+ penalty_jiffies = ((u64)overage * overage * HZ)
+ >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
+
+ /*
+ * Factor in the task's own contribution to the overage, such that four
+ * N-sized allocations are throttled approximately the same as one
+ * 4N-sized allocation.
+ *
+ * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
+ * larger the current charge patch is than that.
+ */
+ penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
+
+ /*
+ * Clamp the max delay per usermode return so as to still keep the
+ * application moving forwards and also permit diagnostics, albeit
+ * extremely slowly.
+ */
+ penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+
+ /*
+ * Don't sleep if the amount of jiffies this memcg owes us is so low
+ * that it's not even worth doing, in an attempt to be nice to those who
+ * go only a small amount over their memory.high value and maybe haven't
+ * been aggressively reclaimed enough yet.
+ */
+ if (penalty_jiffies <= HZ / 100)
+ goto out;
+
+ /*
+ * If we exit early, we're guaranteed to die (since
+ * schedule_timeout_killable sets TASK_KILLABLE). This means we don't
+ * need to account for any ill-begotten jiffies to pay them off later.
+ */
+ psi_memstall_enter(&pflags);
+ schedule_timeout_killable(penalty_jiffies);
+ psi_memstall_leave(&pflags);
+
+out:
+ css_put(&memcg->css);
}
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
@@ -3512,6 +3630,9 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
case _KMEM:
+ pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
+ "Please report your usecase to linux-mm@kvack.org if you "
+ "depend on this functionality.\n");
ret = memcg_update_kmem_max(memcg, nr_pages);
break;
case _TCP:
@@ -4805,11 +4926,6 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
}
}
-static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
-{
- mem_cgroup_id_get_many(memcg, 1);
-}
-
static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
{
mem_cgroup_id_put_many(memcg, 1);
@@ -4955,6 +5071,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
memcg->cgwb_frn[i].done =
__WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
+ INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
+ memcg->deferred_split_queue.split_queue_len = 0;
+#endif
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
return memcg;
fail:
@@ -5333,6 +5454,14 @@ static int mem_cgroup_move_account(struct page *page,
__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (compound && !list_empty(page_deferred_list(page))) {
+ spin_lock(&from->deferred_split_queue.split_queue_lock);
+ list_del_init(page_deferred_list(page));
+ from->deferred_split_queue.split_queue_len--;
+ spin_unlock(&from->deferred_split_queue.split_queue_lock);
+ }
+#endif
/*
* It is safe to change page->mem_cgroup here because the page
* is referenced, charged, and isolated - we can't race with
@@ -5341,6 +5470,17 @@ static int mem_cgroup_move_account(struct page *page,
/* caller should have done css_get */
page->mem_cgroup = to;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (compound && list_empty(page_deferred_list(page))) {
+ spin_lock(&to->deferred_split_queue.split_queue_lock);
+ list_add_tail(page_deferred_list(page),
+ &to->deferred_split_queue.split_queue);
+ to->deferred_split_queue.split_queue_len++;
+ spin_unlock(&to->deferred_split_queue.split_queue_lock);
+ }
+#endif
+
spin_unlock_irqrestore(&from->move_lock, flags);
ret = 0;
@@ -6511,7 +6651,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
unsigned int nr_pages = 1;
if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
+ nr_pages = compound_nr(page);
ug->nr_huge += nr_pages;
}
if (PageAnon(page))
@@ -6523,7 +6663,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
}
ug->pgpgout++;
} else {
- ug->nr_kmem += 1 << compound_order(page);
+ ug->nr_kmem += compound_nr(page);
__ClearPageKmemcg(page);
}
diff --git a/mm/memfd.c b/mm/memfd.c
index 650e65a46b9c..2647c898990c 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_state *xas)
xas_for_each(xas, page, ULONG_MAX) {
if (xa_is_value(page))
continue;
+ page = find_subpage(page, xas->xa_index);
if (page_count(page) - page_mapcount(page) > 1)
xas_set_mark(xas, MEMFD_TAG_PINNED);
@@ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
bool clear = true;
if (xa_is_value(page))
continue;
+ page = find_subpage(page, xas.xa_index);
if (page_count(page) - page_mapcount(page) != 1) {
/*
* On the last scan, we clean up all those tags
diff --git a/mm/memory.c b/mm/memory.c
index b1dff75640b7..b1ca51a079f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -518,7 +518,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
(long long)pte_val(pte), (long long)pmd_val(*pmd));
if (page)
dump_page(page, "bad pte");
- pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
+ pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n",
(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
pr_alert("file:%pD fault:%ps mmap:%ps readpage:%ps\n",
vma->vm_file,
@@ -1026,6 +1026,9 @@ again:
if (pte_none(ptent))
continue;
+ if (need_resched())
+ break;
+
if (pte_present(ptent)) {
struct page *page;
@@ -1093,7 +1096,6 @@ again:
if (unlikely(details))
continue;
- entry = pte_to_swp_entry(ptent);
if (!non_swap_entry(entry))
rss[MM_SWAPENTS]--;
else if (is_migration_entry(entry)) {
@@ -1124,8 +1126,11 @@ again:
if (force_flush) {
force_flush = 0;
tlb_flush_mmu(tlb);
- if (addr != end)
- goto again;
+ }
+
+ if (addr != end) {
+ cond_resched();
+ goto again;
}
return addr;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c73f09913165..b1be791f772d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -632,33 +632,30 @@ static void generic_online_page(struct page *page, unsigned int order)
#endif
}
-static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
-{
- unsigned long end = start + nr_pages;
- int order, onlined_pages = 0;
-
- while (start < end) {
- order = min(MAX_ORDER - 1,
- get_order(PFN_PHYS(end) - PFN_PHYS(start)));
- (*online_page_callback)(pfn_to_page(start), order);
-
- onlined_pages += (1UL << order);
- start += (1UL << order);
- }
- return onlined_pages;
-}
-
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg)
{
- unsigned long onlined_pages = *(unsigned long *)arg;
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn;
+ int order;
- if (PageReserved(pfn_to_page(start_pfn)))
- onlined_pages += online_pages_blocks(start_pfn, nr_pages);
+ /*
+ * Online the pages. The callback might decide to keep some pages
+ * PG_reserved (to add them to the buddy later), but we still account
+ * them as being online/belonging to this zone ("present").
+ */
+ for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) {
+ order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn)));
+ /* __free_pages_core() wants pfns to be aligned to the order */
+ if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order)))
+ order = 0;
+ (*online_page_callback)(pfn_to_page(pfn), order);
+ }
- online_mem_sections(start_pfn, start_pfn + nr_pages);
+ /* mark all involved sections as online */
+ online_mem_sections(start_pfn, end_pfn);
- *(unsigned long *)arg = onlined_pages;
+ *(unsigned long *)arg += nr_pages;
return 0;
}
@@ -714,8 +711,13 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
pgdat->node_start_pfn = start_pfn;
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
-}
+}
+/*
+ * Associate the pfn range with the given zone, initializing the memmaps
+ * and resizing the pgdat/zone data to span the added pages. After this
+ * call, all affected pages are PG_reserved.
+ */
void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
{
@@ -804,20 +806,6 @@ struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
return default_zone_for_pfn(nid, start_pfn, nr_pages);
}
-/*
- * Associates the given pfn range with the given node and the zone appropriate
- * for the given online type.
- */
-static struct zone * __meminit move_pfn_range(int online_type, int nid,
- unsigned long start_pfn, unsigned long nr_pages)
-{
- struct zone *zone;
-
- zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
- move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
- return zone;
-}
-
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
{
unsigned long flags;
@@ -840,7 +828,8 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
put_device(&mem->dev);
/* associate pfn range with the zone */
- zone = move_pfn_range(online_type, nid, pfn, nr_pages);
+ zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
+ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL);
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
@@ -864,6 +853,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
online_pages_range);
if (ret) {
+ /* not a single memory resource was applicable */
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
goto failed_addition;
@@ -877,27 +867,22 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
shuffle_zone(zone);
- if (onlined_pages) {
- node_states_set_node(nid, &arg);
- if (need_zonelists_rebuild)
- build_all_zonelists(NULL);
- else
- zone_pcp_update(zone);
- }
+ node_states_set_node(nid, &arg);
+ if (need_zonelists_rebuild)
+ build_all_zonelists(NULL);
+ else
+ zone_pcp_update(zone);
init_per_zone_wmark_min();
- if (onlined_pages) {
- kswapd_run(nid);
- kcompactd_run(nid);
- }
+ kswapd_run(nid);
+ kcompactd_run(nid);
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
- if (onlined_pages)
- memory_notify(MEM_ONLINE, &arg);
+ memory_notify(MEM_ONLINE, &arg);
mem_hotplug_done();
return 0;
@@ -933,8 +918,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
if (!pgdat)
return NULL;
+ pgdat->per_cpu_nodestats =
+ alloc_percpu(struct per_cpu_nodestat);
arch_refresh_nodedata(nid, pgdat);
} else {
+ int cpu;
/*
* Reset the nr_zones, order and classzone_idx before reuse.
* Note that kswapd will init kswapd_classzone_idx properly
@@ -943,6 +931,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
pgdat->nr_zones = 0;
pgdat->kswapd_order = 0;
pgdat->kswapd_classzone_idx = 0;
+ for_each_online_cpu(cpu) {
+ struct per_cpu_nodestat *p;
+
+ p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
+ memset(p, 0, sizeof(*p));
+ }
}
/* we can use NODE_DATA(nid) from here */
@@ -952,7 +946,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
/* init node's zones as empty zones, we don't have any present pages.*/
free_area_init_core_hotplug(nid);
- pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
/*
* The node we allocated has no zone fallback lists. For avoiding
@@ -1309,7 +1302,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
head = compound_head(page);
if (page_huge_active(head))
return pfn;
- skip = (1 << compound_order(head)) - (page - head);
+ skip = compound_nr(head) - (page - head);
pfn += skip - 1;
}
return 0;
@@ -1347,7 +1340,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
if (PageHuge(page)) {
struct page *head = compound_head(page);
- pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+ pfn = page_to_pfn(head) + compound_nr(head) - 1;
isolate_huge_page(head, &source);
continue;
} else if (PageTransHuge(page))
@@ -1662,7 +1655,7 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
phys_addr_t beginpa, endpa;
beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
- endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
+ endpa = beginpa + memory_block_size_bytes() - 1;
pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
&beginpa, &endpa);
@@ -1800,7 +1793,7 @@ void __remove_memory(int nid, u64 start, u64 size)
{
/*
- * trigger BUG() is some memory is not offlined prior to calling this
+ * trigger BUG() if some memory is not offlined prior to calling this
* function
*/
if (try_remove_memory(nid, start, size))
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f000771558d8..464406e8da91 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1512,10 +1512,6 @@ static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,
if (nodes_empty(*new))
goto out_put;
- nodes_and(*new, *new, node_states[N_MEMORY]);
- if (nodes_empty(*new))
- goto out_put;
-
err = security_task_movememory(task);
if (err)
goto out_put;
diff --git a/mm/migrate.c b/mm/migrate.c
index 9f4ed4e985c1..73d476d690b1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -460,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
for (i = 1; i < HPAGE_PMD_NR; i++) {
xas_next(&xas);
- xas_store(&xas, newpage + i);
+ xas_store(&xas, newpage);
}
}
@@ -1892,7 +1892,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
/* Avoid migrating to a node that is nearly full */
- if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
+ if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
return 0;
if (isolate_lru_page(page))
@@ -2218,17 +2218,15 @@ again:
pte_t pte;
pte = *ptep;
- pfn = pte_pfn(pte);
if (pte_none(pte)) {
mpfn = MIGRATE_PFN_MIGRATE;
migrate->cpages++;
- pfn = 0;
goto next;
}
if (!pte_present(pte)) {
- mpfn = pfn = 0;
+ mpfn = 0;
/*
* Only care about unaddressable device page special
@@ -2245,10 +2243,10 @@ again:
if (is_write_device_private_entry(entry))
mpfn |= MIGRATE_PFN_WRITE;
} else {
+ pfn = pte_pfn(pte);
if (is_zero_pfn(pfn)) {
mpfn = MIGRATE_PFN_MIGRATE;
migrate->cpages++;
- pfn = 0;
goto next;
}
page = vm_normal_page(migrate->vma, addr, pte);
@@ -2258,10 +2256,9 @@ again:
/* FIXME support THP */
if (!page || !page->mapping || PageTransCompound(page)) {
- mpfn = pfn = 0;
+ mpfn = 0;
goto next;
}
- pfn = page_to_pfn(page);
/*
* By getting a reference on the page we pin it and that blocks
diff --git a/mm/mmap.c b/mm/mmap.c
index 6bc21fca20bc..f1e8c7f93e04 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1358,6 +1358,9 @@ static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
if (S_ISBLK(inode->i_mode))
return MAX_LFS_FILESIZE;
+ if (S_ISSOCK(inode->i_mode))
+ return MAX_LFS_FILESIZE;
+
/* Special "we do even unsigned file positions" case */
if (file->f_mode & FMODE_UNSIGNED_OFFSET)
return 0;
@@ -2274,12 +2277,9 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
if (vma) {
*pprev = vma->vm_prev;
} else {
- struct rb_node *rb_node = mm->mm_rb.rb_node;
- *pprev = NULL;
- while (rb_node) {
- *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
- rb_node = rb_node->rb_right;
- }
+ struct rb_node *rb_node = rb_last(&mm->mm_rb);
+
+ *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
}
return vma;
}
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 8c943a6e1696..7d70e5c78f97 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -271,8 +271,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
tlb_flush_mmu(tlb);
- /* keep the page table cache within bounds */
- check_pgt_cache();
#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
tlb_batch_list_free(tlb);
#endif
diff --git a/mm/nommu.c b/mm/nommu.c
index fed1b6e9c89b..99b7ec318824 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -108,7 +108,7 @@ unsigned int kobjsize(const void *objp)
* The ksize() function is only guaranteed to work for pointers
* returned by kmalloc(). So handle arbitrary pointers here.
*/
- return PAGE_SIZE << compound_order(page);
+ return page_size(page);
}
/**
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index eda2e2a0bdc6..c1d9496b4c43 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -73,7 +73,7 @@ static inline bool is_memcg_oom(struct oom_control *oc)
/**
* oom_cpuset_eligible() - check task eligiblity for kill
* @start: task struct of which task to consider
- * @mask: nodemask passed to page allocator for mempolicy ooms
+ * @oc: pointer to struct oom_control
*
* Task eligibility is determined by whether or not a candidate task, @tsk,
* shares the same mempolicy nodes as current if it is bound by such a policy
@@ -287,7 +287,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
!nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
oc->totalpages = total_swap_pages;
for_each_node_mask(nid, *oc->nodemask)
- oc->totalpages += node_spanned_pages(nid);
+ oc->totalpages += node_present_pages(nid);
return CONSTRAINT_MEMORY_POLICY;
}
@@ -300,7 +300,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
if (cpuset_limited) {
oc->totalpages = total_swap_pages;
for_each_node_mask(nid, cpuset_current_mems_allowed)
- oc->totalpages += node_spanned_pages(nid);
+ oc->totalpages += node_present_pages(nid);
return CONSTRAINT_CPUSET;
}
return CONSTRAINT_NONE;
@@ -884,12 +884,13 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
*/
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
mark_oom_victim(victim);
- pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
- message, task_pid_nr(victim), victim->comm,
- K(victim->mm->total_vm),
- K(get_mm_counter(victim->mm, MM_ANONPAGES)),
- K(get_mm_counter(victim->mm, MM_FILEPAGES)),
- K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
+ pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
+ message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
+ K(get_mm_counter(mm, MM_ANONPAGES)),
+ K(get_mm_counter(mm, MM_FILEPAGES)),
+ K(get_mm_counter(mm, MM_SHMEMPAGES)),
+ from_kuid(&init_user_ns, task_uid(victim)),
+ mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
task_unlock(victim);
/*
@@ -1068,9 +1069,10 @@ bool out_of_memory(struct oom_control *oc)
* The OOM killer does not compensate for IO-less reclaim.
* pagefault_out_of_memory lost its gfp context so we have to
* make sure exclude 0 mask - all other users should have at least
- * ___GFP_DIRECT_RECLAIM to get here.
+ * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
+ * invoke the OOM killer even if it is a GFP_NOFS allocation.
*/
- if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
+ if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
return true;
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ff5484fdbdf9..3334a769eb91 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -670,6 +670,7 @@ out:
void free_compound_page(struct page *page)
{
+ mem_cgroup_uncharge(page);
__free_pages_ok(page, compound_order(page));
}
@@ -3955,14 +3956,22 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
goto check_priority;
/*
+ * compaction was skipped because there are not enough order-0 pages
+ * to work with, so we retry only if it looks like reclaim can help.
+ */
+ if (compaction_needs_reclaim(compact_result)) {
+ ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+ goto out;
+ }
+
+ /*
* make sure the compaction wasn't deferred or didn't bail out early
* due to locks contention before we declare that we should give up.
- * But do not retry if the given zonelist is not suitable for
- * compaction.
+ * But the next retry should use a higher priority if allowed, so
+ * we don't just keep bailing out endlessly.
*/
if (compaction_withdrawn(compact_result)) {
- ret = compaction_zonelist_suitable(ac, order, alloc_flags);
- goto out;
+ goto check_priority;
}
/*
@@ -6638,9 +6647,11 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void pgdat_init_split_queue(struct pglist_data *pgdat)
{
- spin_lock_init(&pgdat->split_queue_lock);
- INIT_LIST_HEAD(&pgdat->split_queue);
- pgdat->split_queue_len = 0;
+ struct deferred_split *ds_queue = &pgdat->deferred_split_queue;
+
+ spin_lock_init(&ds_queue->split_queue_lock);
+ INIT_LIST_HEAD(&ds_queue->split_queue);
+ ds_queue->split_queue_len = 0;
}
#else
static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
@@ -8196,7 +8207,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
if (!hugepage_migration_supported(page_hstate(head)))
goto unmovable;
- skip_pages = (1 << compound_order(head)) - (page - head);
+ skip_pages = compound_nr(head) - (page - head);
iter += skip_pages - 1;
continue;
}
diff --git a/mm/page_owner.c b/mm/page_owner.c
index addcbb2ae4e4..dee931184788 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -24,6 +24,9 @@ struct page_owner {
short last_migrate_reason;
gfp_t gfp_mask;
depot_stack_handle_t handle;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ depot_stack_handle_t free_handle;
+#endif
};
static bool page_owner_disabled = true;
@@ -102,19 +105,6 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
return (void *)page_ext + page_owner_ops.offset;
}
-void __reset_page_owner(struct page *page, unsigned int order)
-{
- int i;
- struct page_ext *page_ext;
-
- for (i = 0; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
- if (unlikely(!page_ext))
- continue;
- __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
- }
-}
-
static inline bool check_recursive_alloc(unsigned long *entries,
unsigned int nr_entries,
unsigned long ip)
@@ -154,18 +144,50 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
return handle;
}
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
- depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
+void __reset_page_owner(struct page *page, unsigned int order)
{
+ int i;
+ struct page_ext *page_ext;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ depot_stack_handle_t handle = 0;
struct page_owner *page_owner;
- page_owner = get_page_owner(page_ext);
- page_owner->handle = handle;
- page_owner->order = order;
- page_owner->gfp_mask = gfp_mask;
- page_owner->last_migrate_reason = -1;
+ if (debug_pagealloc_enabled())
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+#endif
- __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ for (i = 0; i < (1 << order); i++) {
+ page_ext = lookup_page_ext(page + i);
+ if (unlikely(!page_ext))
+ continue;
+ __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ if (debug_pagealloc_enabled()) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->free_handle = handle;
+ }
+#endif
+ }
+}
+
+static inline void __set_page_owner_handle(struct page *page,
+ struct page_ext *page_ext, depot_stack_handle_t handle,
+ unsigned int order, gfp_t gfp_mask)
+{
+ struct page_owner *page_owner;
+ int i;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->handle = handle;
+ page_owner->order = order;
+ page_owner->gfp_mask = gfp_mask;
+ page_owner->last_migrate_reason = -1;
+ __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+
+ page_ext = lookup_page_ext(page + i);
+ }
}
noinline void __set_page_owner(struct page *page, unsigned int order,
@@ -178,7 +200,7 @@ noinline void __set_page_owner(struct page *page, unsigned int order,
return;
handle = save_stack(gfp_mask);
- __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+ __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
}
void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -204,8 +226,11 @@ void __split_page_owner(struct page *page, unsigned int order)
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
- for (i = 1; i < (1 << order); i++)
- __copy_page_owner(page, page + i);
+ for (i = 1; i < (1 << order); i++) {
+ page_ext = lookup_page_ext(page + i);
+ page_owner = get_page_owner(page_ext);
+ page_owner->order = 0;
+ }
}
void __copy_page_owner(struct page *oldpage, struct page *newpage)
@@ -235,6 +260,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
* the new page, which will be freed.
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -294,7 +320,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (unlikely(!page_ext))
continue;
- if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
@@ -405,20 +431,36 @@ void __dump_page_owner(struct page *page)
mt = gfpflags_to_migratetype(gfp_mask);
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
- pr_alert("page_owner info is not active (free page?)\n");
+ pr_alert("page_owner info is not present (never set?)\n");
return;
}
+ if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ pr_alert("page_owner tracks the page as allocated\n");
+ else
+ pr_alert("page_owner tracks the page as freed\n");
+
+ pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
+ page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
+
handle = READ_ONCE(page_owner->handle);
if (!handle) {
- pr_alert("page_owner info is not active (free page?)\n");
- return;
+ pr_alert("page_owner allocation stack trace missing\n");
+ } else {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries, 0);
}
- nr_entries = stack_depot_fetch(handle, &entries);
- pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
- page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
- stack_trace_print(entries, nr_entries, 0);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ handle = READ_ONCE(page_owner->free_handle);
+ if (!handle) {
+ pr_alert("page_owner free stack trace missing\n");
+ } else {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ pr_alert("page last free stack trace:\n");
+ stack_trace_print(entries, nr_entries, 0);
+ }
+#endif
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
@@ -481,9 +523,23 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
continue;
+ /*
+ * Although we do have the info about past allocation of free
+ * pages, it's not relevant for current memory usage.
+ */
+ if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ continue;
+
page_owner = get_page_owner(page_ext);
/*
+ * Don't print "tail" pages of high-order allocations as that
+ * would inflate the stats.
+ */
+ if (!IS_ALIGNED(pfn, 1 << page_owner->order))
+ continue;
+
+ /*
* Access to page_ext->handle isn't synchronous so we should
* be careful to access it.
*/
@@ -562,7 +618,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
continue;
/* Found early allocated page */
- __set_page_owner_handle(page_ext, early_handle, 0, 0);
+ __set_page_owner_handle(page, page_ext, early_handle,
+ 0, 0);
count++;
}
cond_resched();
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 21d4f97cb49b..34b9181ee5d1 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -101,7 +101,7 @@ static void unpoison_page(struct page *page)
/*
* Page poisoning when enabled poisons each and every page
* that is freed to buddy. Thus no extra check is done to
- * see if a page was posioned.
+ * see if a page was poisoned.
*/
check_poison_mem(addr, PAGE_SIZE);
kunmap_atomic(addr);
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 11df03e71288..eff4b4520c8d 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -153,8 +153,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */
- pvmw->pte = huge_pte_offset(mm, pvmw->address,
- PAGE_SIZE << compound_order(page));
+ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
if (!pvmw->pte)
return false;
diff --git a/mm/quicklist.c b/mm/quicklist.c
deleted file mode 100644
index 5e98ac78e410..000000000000
--- a/mm/quicklist.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Quicklist support.
- *
- * Quicklists are light weight lists of pages that have a defined state
- * on alloc and free. Pages must be in the quicklist specific defined state
- * (zero by default) when the page is freed. It seems that the initial idea
- * for such lists first came from Dave Miller and then various other people
- * improved on it.
- *
- * Copyright (C) 2007 SGI,
- * Christoph Lameter <cl@linux.com>
- * Generalized, added support for multiple lists and
- * constructors / destructors.
- */
-#include <linux/kernel.h>
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/quicklist.h>
-
-DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
-
-#define FRACTION_OF_NODE_MEM 16
-
-static unsigned long max_pages(unsigned long min_pages)
-{
- unsigned long node_free_pages, max;
- int node = numa_node_id();
- struct zone *zones = NODE_DATA(node)->node_zones;
- int num_cpus_on_node;
-
- node_free_pages =
-#ifdef CONFIG_ZONE_DMA
- zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
- zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) +
-#endif
- zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES);
-
- max = node_free_pages / FRACTION_OF_NODE_MEM;
-
- num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
- max /= num_cpus_on_node;
-
- return max(max, min_pages);
-}
-
-static long min_pages_to_free(struct quicklist *q,
- unsigned long min_pages, long max_free)
-{
- long pages_to_free;
-
- pages_to_free = q->nr_pages - max_pages(min_pages);
-
- return min(pages_to_free, max_free);
-}
-
-/*
- * Trim down the number of pages in the quicklist
- */
-void quicklist_trim(int nr, void (*dtor)(void *),
- unsigned long min_pages, unsigned long max_free)
-{
- long pages_to_free;
- struct quicklist *q;
-
- q = &get_cpu_var(quicklist)[nr];
- if (q->nr_pages > min_pages) {
- pages_to_free = min_pages_to_free(q, min_pages, max_free);
-
- while (pages_to_free > 0) {
- /*
- * We pass a gfp_t of 0 to quicklist_alloc here
- * because we will never call into the page allocator.
- */
- void *p = quicklist_alloc(nr, 0, NULL);
-
- if (dtor)
- dtor(p);
- free_page((unsigned long)p);
- pages_to_free--;
- }
- }
- put_cpu_var(quicklist);
-}
-
-unsigned long quicklist_total_size(void)
-{
- unsigned long count = 0;
- int cpu;
- struct quicklist *ql, *q;
-
- for_each_online_cpu(cpu) {
- ql = per_cpu(quicklist, cpu);
- for (q = ql; q < ql + CONFIG_NR_QUICK; q++)
- count += q->nr_pages;
- }
- return count;
-}
-
diff --git a/mm/rmap.c b/mm/rmap.c
index 003377e24232..d9a23bb773bf 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -898,15 +898,13 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
0, vma, vma->vm_mm, address,
- min(vma->vm_end, address +
- (PAGE_SIZE << compound_order(page))));
+ min(vma->vm_end, address + page_size(page)));
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
- unsigned long cstart;
int ret = 0;
- cstart = address = pvmw.address;
+ address = pvmw.address;
if (pvmw.pte) {
pte_t entry;
pte_t *pte = pvmw.pte;
@@ -933,7 +931,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
- cstart &= PMD_MASK;
ret = 1;
#else
/* unexpected pmd-mapped page? */
@@ -1192,8 +1189,10 @@ void page_add_file_rmap(struct page *page, bool compound)
}
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
goto out;
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
- __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ if (PageSwapBacked(page))
+ __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ else
+ __inc_node_page_state(page, NR_FILE_PMDMAPPED);
} else {
if (PageTransCompound(page) && page_mapping(page)) {
VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1232,8 +1231,10 @@ static void page_remove_file_rmap(struct page *page, bool compound)
}
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
goto out;
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
- __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ if (PageSwapBacked(page))
+ __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ else
+ __dec_node_page_state(page, NR_FILE_PMDMAPPED);
} else {
if (!atomic_add_negative(-1, &page->_mapcount))
goto out;
@@ -1374,8 +1375,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
address,
- min(vma->vm_end, address +
- (PAGE_SIZE << compound_order(page))));
+ min(vma->vm_end, address + page_size(page)));
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
@@ -1524,8 +1524,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (PageHuge(page)) {
- int nr = 1 << compound_order(page);
- hugetlb_count_sub(nr, mm);
+ hugetlb_count_sub(compound_nr(page), mm);
set_huge_swap_pte_at(mm, address,
pvmw.pte, pteval,
vma_mmu_pagesize(vma));
diff --git a/mm/shmem.c b/mm/shmem.c
index 0f7fd4a85db6..30ce722c23fa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -609,7 +609,7 @@ static int shmem_add_to_page_cache(struct page *page,
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
unsigned long i = 0;
- unsigned long nr = 1UL << compound_order(page);
+ unsigned long nr = compound_nr(page);
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -631,7 +631,7 @@ static int shmem_add_to_page_cache(struct page *page,
if (xas_error(&xas))
goto unlock;
next:
- xas_store(&xas, page + i);
+ xas_store(&xas, page);
if (++i < nr) {
xas_next(&xas);
goto next;
@@ -1734,7 +1734,7 @@ unlock:
* vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache.
*
- * fault_mm and fault_type are only supplied by shmem_fault:
+ * vmf and fault_type are only supplied by shmem_fault:
* otherwise they are NULL.
*/
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
@@ -1884,7 +1884,7 @@ alloc_nohuge:
lru_cache_add_anon(page);
spin_lock_irq(&info->lock);
- info->alloced += 1 << compound_order(page);
+ info->alloced += compound_nr(page);
inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
@@ -1925,7 +1925,7 @@ clear:
struct page *head = compound_head(page);
int i;
- for (i = 0; i < (1 << compound_order(head)); i++) {
+ for (i = 0; i < compound_nr(head); i++) {
clear_highpage(head + i);
flush_dcache_page(head + i);
}
@@ -1952,7 +1952,7 @@ clear:
* Error recovery.
*/
unacct:
- shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
+ shmem_inode_unacct_blocks(inode, compound_nr(page));
if (PageTransHuge(page)) {
unlock_page(page);
diff --git a/mm/slab.h b/mm/slab.h
index 9057b8056b07..68e455f2b698 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -30,6 +30,69 @@ struct kmem_cache {
struct list_head list; /* List of all slab caches on the system */
};
+#else /* !CONFIG_SLOB */
+
+struct memcg_cache_array {
+ struct rcu_head rcu;
+ struct kmem_cache *entries[0];
+};
+
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system. To allow the
+ * array to be accessed without taking any locks, on relocation we free the old
+ * version only after a grace period.
+ *
+ * Root and child caches hold different metadata.
+ *
+ * @root_cache: Common to root and child caches. NULL for root, pointer to
+ * the root cache for children.
+ *
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches. This table is
+ * used to index child cachces during allocation and cleared
+ * early during shutdown.
+ *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
+ * @children: List of all child caches. While the child caches are also
+ * reachable through @memcg_caches, a child cache remains on
+ * this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg: Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
+ */
+struct memcg_cache_params {
+ struct kmem_cache *root_cache;
+ union {
+ struct {
+ struct memcg_cache_array __rcu *memcg_caches;
+ struct list_head __root_caches_node;
+ struct list_head children;
+ bool dying;
+ };
+ struct {
+ struct mem_cgroup *memcg;
+ struct list_head children_node;
+ struct list_head kmem_caches_node;
+ struct percpu_ref refcnt;
+
+ void (*work_fn)(struct kmem_cache *);
+ union {
+ struct rcu_head rcu_head;
+ struct work_struct work;
+ };
+ };
+ };
+};
#endif /* CONFIG_SLOB */
#ifdef CONFIG_SLAB
@@ -174,6 +237,7 @@ int __kmem_cache_shrink(struct kmem_cache *);
void __kmemcg_cache_deactivate(struct kmem_cache *s);
void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s);
void slab_kmem_cache_release(struct kmem_cache *);
+void kmem_cache_shrink_all(struct kmem_cache *s);
struct seq_file;
struct file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 807490fe217a..6491c3a41805 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -981,6 +981,43 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
}
EXPORT_SYMBOL(kmem_cache_shrink);
+/**
+ * kmem_cache_shrink_all - shrink a cache and all memcg caches for root cache
+ * @s: The cache pointer
+ */
+void kmem_cache_shrink_all(struct kmem_cache *s)
+{
+ struct kmem_cache *c;
+
+ if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || !is_root_cache(s)) {
+ kmem_cache_shrink(s);
+ return;
+ }
+
+ get_online_cpus();
+ get_online_mems();
+ kasan_cache_shrink(s);
+ __kmem_cache_shrink(s);
+
+ /*
+ * We have to take the slab_mutex to protect from the memcg list
+ * modification.
+ */
+ mutex_lock(&slab_mutex);
+ for_each_memcg_cache(c, s) {
+ /*
+ * Don't need to shrink deactivated memcg caches.
+ */
+ if (s->flags & SLAB_DEACTIVATED)
+ continue;
+ kasan_cache_shrink(c);
+ __kmem_cache_shrink(c);
+ }
+ mutex_unlock(&slab_mutex);
+ put_online_mems();
+ put_online_cpus();
+}
+
bool slab_is_available(void)
{
return slab_state >= UP;
diff --git a/mm/slob.c b/mm/slob.c
index 7f421d0ca9ab..cf377beab962 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -539,7 +539,7 @@ size_t __ksize(const void *block)
sp = virt_to_page(block);
if (unlikely(!PageSlab(sp)))
- return PAGE_SIZE << compound_order(sp);
+ return page_size(sp);
align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
m = (unsigned int *)(block - align);
diff --git a/mm/slub.c b/mm/slub.c
index 8834563cdb4b..42c1b3af3c98 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -829,7 +829,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
return 1;
start = page_address(page);
- length = PAGE_SIZE << compound_order(page);
+ length = page_size(page);
end = start + length;
remainder = length % s->size;
if (!remainder)
@@ -1074,13 +1074,14 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
init_tracking(s, object);
}
-static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
+static
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
{
if (!(s->flags & SLAB_POISON))
return;
metadata_access_enable();
- memset(addr, POISON_INUSE, PAGE_SIZE << order);
+ memset(addr, POISON_INUSE, page_size(page));
metadata_access_disable();
}
@@ -1340,8 +1341,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
-static inline void setup_page_debug(struct kmem_cache *s,
- void *addr, int order) {}
+static inline
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
@@ -1639,7 +1640,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
void *start, *p, *next;
- int idx, order;
+ int idx;
bool shuffle;
flags &= gfp_allowed_mask;
@@ -1673,7 +1674,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
page->objects = oo_objects(oo);
- order = compound_order(page);
page->slab_cache = s;
__SetPageSlab(page);
if (page_is_pfmemalloc(page))
@@ -1683,7 +1683,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
start = page_address(page);
- setup_page_debug(s, start, order);
+ setup_page_debug(s, page, start);
shuffle = shuffle_freelist(s, page);
@@ -2004,6 +2004,7 @@ static inline unsigned long next_tid(unsigned long tid)
return tid + TID_STEP;
}
+#ifdef SLUB_DEBUG_CMPXCHG
static inline unsigned int tid_to_cpu(unsigned long tid)
{
return tid % TID_STEP;
@@ -2013,6 +2014,7 @@ static inline unsigned long tid_to_event(unsigned long tid)
{
return tid / TID_STEP;
}
+#endif
static inline unsigned int init_tid(int cpu)
{
@@ -3930,7 +3932,7 @@ size_t __ksize(const void *object)
if (unlikely(!PageSlab(page))) {
WARN_ON(!PageCompound(page));
- return PAGE_SIZE << compound_order(page);
+ return page_size(page);
}
return slab_ksize(page->slab_cache);
@@ -5298,7 +5300,7 @@ static ssize_t shrink_store(struct kmem_cache *s,
const char *buf, size_t length)
{
if (buf[0] == '1')
- kmem_cache_shrink(s);
+ kmem_cache_shrink_all(s);
else
return -EINVAL;
return length;
diff --git a/mm/sparse.c b/mm/sparse.c
index 72f010d9bff5..bf32de9e666b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -11,6 +11,8 @@
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include "internal.h"
#include <asm/dma.h>
@@ -470,6 +472,12 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
static void *sparsemap_buf __meminitdata;
static void *sparsemap_buf_end __meminitdata;
+static inline void __meminit sparse_buffer_free(unsigned long size)
+{
+ WARN_ON(!sparsemap_buf || size == 0);
+ memblock_free_early(__pa(sparsemap_buf), size);
+}
+
static void __init sparse_buffer_init(unsigned long size, int nid)
{
phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
@@ -486,7 +494,7 @@ static void __init sparse_buffer_fini(void)
unsigned long size = sparsemap_buf_end - sparsemap_buf;
if (sparsemap_buf && size > 0)
- memblock_free_early(__pa(sparsemap_buf), size);
+ sparse_buffer_free(size);
sparsemap_buf = NULL;
}
@@ -495,11 +503,15 @@ void * __meminit sparse_buffer_alloc(unsigned long size)
void *ptr = NULL;
if (sparsemap_buf) {
- ptr = PTR_ALIGN(sparsemap_buf, size);
+ ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
if (ptr + size > sparsemap_buf_end)
ptr = NULL;
- else
+ else {
+ /* Free redundant aligned space */
+ if ((unsigned long)(ptr - sparsemap_buf) > 0)
+ sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
sparsemap_buf = ptr + size;
+ }
}
return ptr;
}
@@ -867,7 +879,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
*/
page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
- ms = __pfn_to_section(start_pfn);
+ ms = __nr_to_section(section_nr);
set_section_nid(section_nr, nid);
section_mark_present(ms);
@@ -884,9 +896,6 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
{
int i;
- if (!memmap)
- return;
-
/*
* A further optimization is to have per section refcounted
* num_poisoned_pages. But that would need more space per memmap, so
@@ -898,7 +907,7 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
for (i = 0; i < nr_pages; i++) {
if (PageHWPoison(&memmap[i])) {
- atomic_long_sub(1, &num_poisoned_pages);
+ num_poisoned_pages_dec();
ClearPageHWPoison(&memmap[i]);
}
}
diff --git a/mm/swap.c b/mm/swap.c
index ae300397dfda..784dc1620620 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -71,12 +71,12 @@ static void __page_cache_release(struct page *page)
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
}
__ClearPageWaiters(page);
- mem_cgroup_uncharge(page);
}
static void __put_single_page(struct page *page)
{
__page_cache_release(page);
+ mem_cgroup_uncharge(page);
free_unref_page(page);
}
@@ -515,7 +515,6 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
del_page_from_lru_list(page, lruvec, lru + active);
ClearPageActive(page);
ClearPageReferenced(page);
- add_page_to_lru_list(page, lruvec, lru);
if (PageWriteback(page) || PageDirty(page)) {
/*
@@ -523,13 +522,14 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
* It can make readahead confusing. But race window
* is _really_ small and it's non-critical problem.
*/
+ add_page_to_lru_list(page, lruvec, lru);
SetPageReclaim(page);
} else {
/*
* The page's writeback ends up during pagevec
* We moves tha page into tail of inactive.
*/
- list_move_tail(&page->lru, &lruvec->lists[lru]);
+ add_page_to_lru_list_tail(page, lruvec, lru);
__count_vm_event(PGROTATED);
}
@@ -844,17 +844,15 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
get_page(page_tail);
list_add_tail(&page_tail->lru, list);
} else {
- struct list_head *list_head;
/*
* Head page has not yet been counted, as an hpage,
* so we must account for each subpage individually.
*
- * Use the standard add function to put page_tail on the list,
- * but then correct its position so they all end up in order.
+ * Put page_tail on the list at the correct position
+ * so they all end up in order.
*/
- add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
- list_head = page_tail->lru.prev;
- list_move_tail(&page_tail->lru, list_head);
+ add_page_to_lru_list_tail(page_tail, lruvec,
+ page_lru(page_tail));
}
if (!PageUnevictable(page))
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8368621a0fc7..8e7ce9a9bc5e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -116,7 +116,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
struct address_space *address_space = swap_address_space(entry);
pgoff_t idx = swp_offset(entry);
XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
- unsigned long i, nr = 1UL << compound_order(page);
+ unsigned long i, nr = compound_nr(page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@ -133,7 +133,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
for (i = 0; i < nr; i++) {
VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
set_page_private(page + i, entry.val + i);
- xas_store(&xas, page + i);
+ xas_store(&xas, page);
xas_next(&xas);
}
address_space->nrpages += nr;
@@ -168,7 +168,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
for (i = 0; i < nr; i++) {
void *entry = xas_store(&xas, NULL);
- VM_BUG_ON_PAGE(entry != page + i, entry);
+ VM_BUG_ON_PAGE(entry != page, entry);
set_page_private(page + i, 0);
xas_next(&xas);
}
diff --git a/mm/util.c b/mm/util.c
index e6351a80f248..3ad6db9a722e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -16,6 +16,13 @@
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/userfaultfd_k.h>
+#include <linux/elf.h>
+#include <linux/elf-randomize.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/processor.h>
+#include <linux/sizes.h>
+#include <linux/compat.h>
#include <linux/uaccess.h>
@@ -293,7 +300,105 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
}
-#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
+#endif
+
+unsigned long randomize_stack_top(unsigned long stack_top)
+{
+ unsigned long random_variable = 0;
+
+ if (current->flags & PF_RANDOMIZE) {
+ random_variable = get_random_long();
+ random_variable &= STACK_RND_MASK;
+ random_variable <<= PAGE_SHIFT;
+ }
+#ifdef CONFIG_STACK_GROWSUP
+ return PAGE_ALIGN(stack_top) + random_variable;
+#else
+ return PAGE_ALIGN(stack_top) - random_variable;
+#endif
+}
+
+#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ /* Is the current task 32bit ? */
+ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
+ return randomize_page(mm->brk, SZ_32M);
+
+ return randomize_page(mm->brk, SZ_1G);
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+ unsigned long rnd;
+
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ if (is_compat_task())
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
+ else
+#endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+
+ return rnd << PAGE_SHIFT;
+}
+
+static int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
+ return 1;
+
+ return sysctl_legacy_va_layout;
+}
+
+/*
+ * Leave enough space between the mmap area and the stack to honour ulimit in
+ * the face of randomisation.
+ */
+#define MIN_GAP (SZ_128M)
+#define MAX_GAP (STACK_TOP / 6 * 5)
+
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
+{
+ unsigned long gap = rlim_stack->rlim_cur;
+ unsigned long pad = stack_guard_gap;
+
+ /* Account for stack randomization if necessary */
+ if (current->flags & PF_RANDOMIZE)
+ pad += (STACK_RND_MASK << PAGE_SHIFT);
+
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return PAGE_ALIGN(STACK_TOP - gap - rnd);
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+ unsigned long random_factor = 0UL;
+
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+ if (mmap_is_legacy(rlim_stack)) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+}
+#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
mm->mmap_base = TASK_UNMAPPED_BASE;
@@ -521,7 +626,7 @@ bool page_mapped(struct page *page)
return true;
if (PageHuge(page))
return false;
- for (i = 0; i < (1 << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
if (atomic_read(&page[i]._mapcount) >= 0)
return true;
}
@@ -783,3 +888,16 @@ out_mm:
out:
return res;
}
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+ char *addr1, *addr2;
+ int ret;
+
+ addr1 = kmap_atomic(page1);
+ addr2 = kmap_atomic(page2);
+ ret = memcmp(addr1, addr2, PAGE_SIZE);
+ kunmap_atomic(addr2);
+ kunmap_atomic(addr1);
+ return ret;
+}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c1246d77cf75..fcadd3e25c0c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -329,8 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
-#define VM_LAZY_FREE 0x02
-#define VM_VM_AREA 0x04
static DEFINE_SPINLOCK(vmap_area_lock);
/* Export for kexec only */
@@ -1116,7 +1114,7 @@ retry:
va->va_start = addr;
va->va_end = addr + size;
- va->flags = 0;
+ va->vm = NULL;
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
spin_unlock(&vmap_area_lock);
@@ -1282,7 +1280,14 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
llist_for_each_entry_safe(va, n_va, valist, purge_list) {
unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
- __free_vmap_area(va);
+ /*
+ * Finally insert or merge lazily-freed area. It is
+ * detached and there is no need to "unlink" it from
+ * anything.
+ */
+ merge_or_add_vmap_area(va,
+ &free_vmap_area_root, &free_vmap_area_list);
+
atomic_long_sub(nr, &vmap_lazy_nr);
if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
@@ -1324,6 +1329,10 @@ static void free_vmap_area_noflush(struct vmap_area *va)
{
unsigned long nr_lazy;
+ spin_lock(&vmap_area_lock);
+ unlink_va(va, &vmap_area_root);
+ spin_unlock(&vmap_area_lock);
+
nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
PAGE_SHIFT, &vmap_lazy_nr);
@@ -1918,7 +1927,6 @@ void __init vmalloc_init(void)
if (WARN_ON_ONCE(!va))
continue;
- va->flags = VM_VM_AREA;
va->va_start = (unsigned long)tmp->addr;
va->va_end = va->va_start + tmp->size;
va->vm = tmp;
@@ -2016,7 +2024,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
vm->size = va->va_end - va->va_start;
vm->caller = caller;
va->vm = vm;
- va->flags |= VM_VM_AREA;
spin_unlock(&vmap_area_lock);
}
@@ -2121,10 +2128,10 @@ struct vm_struct *find_vm_area(const void *addr)
struct vmap_area *va;
va = find_vmap_area((unsigned long)addr);
- if (va && va->flags & VM_VM_AREA)
- return va->vm;
+ if (!va)
+ return NULL;
- return NULL;
+ return va->vm;
}
/**
@@ -2143,14 +2150,12 @@ struct vm_struct *remove_vm_area(const void *addr)
might_sleep();
- va = find_vmap_area((unsigned long)addr);
- if (va && va->flags & VM_VM_AREA) {
+ spin_lock(&vmap_area_lock);
+ va = __find_vmap_area((unsigned long)addr);
+ if (va && va->vm) {
struct vm_struct *vm = va->vm;
- spin_lock(&vmap_area_lock);
va->vm = NULL;
- va->flags &= ~VM_VM_AREA;
- va->flags |= VM_LAZY_FREE;
spin_unlock(&vmap_area_lock);
kasan_free_shadow(vm);
@@ -2158,6 +2163,8 @@ struct vm_struct *remove_vm_area(const void *addr)
return vm;
}
+
+ spin_unlock(&vmap_area_lock);
return NULL;
}
@@ -2402,7 +2409,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
- area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
@@ -2410,13 +2416,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
} else {
pages = kmalloc_node(array_size, nested_gfp, node);
}
- area->pages = pages;
- if (!area->pages) {
+
+ if (!pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
+ area->pages = pages;
+ area->nr_pages = nr_pages;
+
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
@@ -2851,7 +2860,7 @@ long vread(char *buf, char *addr, unsigned long count)
if (!count)
break;
- if (!(va->flags & VM_VM_AREA))
+ if (!va->vm)
continue;
vm = va->vm;
@@ -2931,7 +2940,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
if (!count)
break;
- if (!(va->flags & VM_VM_AREA))
+ if (!va->vm)
continue;
vm = va->vm;
@@ -3450,6 +3459,22 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
}
}
+static void show_purge_info(struct seq_file *m)
+{
+ struct llist_node *head;
+ struct vmap_area *va;
+
+ head = READ_ONCE(vmap_purge_list.first);
+ if (head == NULL)
+ return;
+
+ llist_for_each_entry(va, head, purge_list) {
+ seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+ (void *)va->va_start, (void *)va->va_end,
+ va->va_end - va->va_start);
+ }
+}
+
static int s_show(struct seq_file *m, void *p)
{
struct vmap_area *va;
@@ -3458,14 +3483,13 @@ static int s_show(struct seq_file *m, void *p)
va = list_entry(p, struct vmap_area, list);
/*
- * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
- * behalf of vmap area is being tear down or vm_map_ram allocation.
+ * s_show can encounter race with remove_vm_area, !vm on behalf
+ * of vmap area is being tear down or vm_map_ram allocation.
*/
- if (!(va->flags & VM_VM_AREA)) {
- seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
+ if (!va->vm) {
+ seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
(void *)va->va_start, (void *)va->va_end,
- va->va_end - va->va_start,
- va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+ va->va_end - va->va_start);
return 0;
}
@@ -3504,6 +3528,16 @@ static int s_show(struct seq_file *m, void *p)
show_numa_info(m, v);
seq_putc(m, '\n');
+
+ /*
+ * As a final step, dump "unpurged" areas. Note,
+ * that entire "/proc/vmallocinfo" output will not
+ * be address sorted, because the purge list is not
+ * sorted.
+ */
+ if (list_is_last(&va->list, &vmap_area_list))
+ show_purge_info(m);
+
return 0;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6c5d0b28321..4911754c93b7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -171,11 +171,22 @@ int vm_swappiness = 60;
*/
unsigned long vm_total_pages;
+static void set_task_reclaim_state(struct task_struct *task,
+ struct reclaim_state *rs)
+{
+ /* Check for an overwrite */
+ WARN_ON_ONCE(rs && task->reclaim_state);
+
+ /* Check for the nulling of an already-nulled member */
+ WARN_ON_ONCE(!rs && !task->reclaim_state);
+
+ task->reclaim_state = rs;
+}
+
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
-#ifdef CONFIG_MEMCG_KMEM
-
+#ifdef CONFIG_MEMCG
/*
* We allow subsystems to populate their shrinker-related
* LRU lists before register_shrinker_prepared() is called
@@ -227,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
idr_remove(&shrinker_idr, id);
up_write(&shrinker_rwsem);
}
-#else /* CONFIG_MEMCG_KMEM */
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)
-{
- return 0;
-}
-
-static void unregister_memcg_shrinker(struct shrinker *shrinker)
-{
-}
-#endif /* CONFIG_MEMCG_KMEM */
-
-static void set_task_reclaim_state(struct task_struct *task,
- struct reclaim_state *rs)
-{
- /* Check for an overwrite */
- WARN_ON_ONCE(rs && task->reclaim_state);
-
- /* Check for the nulling of an already-nulled member */
- WARN_ON_ONCE(!rs && !task->reclaim_state);
- task->reclaim_state = rs;
-}
-
-#ifdef CONFIG_MEMCG
static bool global_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup;
@@ -305,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat,
}
#else
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+ return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+
static bool global_reclaim(struct scan_control *sc)
{
return true;
@@ -591,7 +588,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
return freed;
}
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority)
{
@@ -599,7 +596,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
unsigned long ret, freed = 0;
int i;
- if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
+ if (!mem_cgroup_online(memcg))
return 0;
if (!down_read_trylock(&shrinker_rwsem))
@@ -625,6 +622,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
continue;
}
+ /* Call non-slab shrinkers even though kmem is disabled */
+ if (!memcg_kmem_enabled() &&
+ !(shrinker->flags & SHRINKER_NONSLAB))
+ continue;
+
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY) {
clear_bit(i, map->map);
@@ -661,13 +663,13 @@ unlock:
up_read(&shrinker_rwsem);
return freed;
}
-#else /* CONFIG_MEMCG_KMEM */
+#else /* CONFIG_MEMCG */
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority)
{
return 0;
}
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
/**
* shrink_slab - shrink slab caches
@@ -1149,7 +1151,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
VM_BUG_ON_PAGE(PageActive(page), page);
- nr_pages = 1 << compound_order(page);
+ nr_pages = compound_nr(page);
/* Account the number of base pages even though THP */
sc->nr_scanned += nr_pages;
@@ -1487,10 +1489,9 @@ free_it:
* Is there need to periodically free_page_list? It would
* appear not as the counts should be low
*/
- if (unlikely(PageTransHuge(page))) {
- mem_cgroup_uncharge(page);
+ if (unlikely(PageTransHuge(page)))
(*get_compound_page_dtor(page))(page);
- } else
+ else
list_add(&page->lru, &free_pages);
continue;
@@ -1705,7 +1706,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
VM_BUG_ON_PAGE(!PageLRU(page), page);
- nr_pages = 1 << compound_order(page);
+ nr_pages = compound_nr(page);
total_scan += nr_pages;
if (page_zonenum(page) > sc->reclaim_idx) {
@@ -1911,7 +1912,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&pgdat->lru_lock);
- mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&pgdat->lru_lock);
} else
@@ -2586,7 +2586,6 @@ static bool in_reclaim_compaction(struct scan_control *sc)
*/
static inline bool should_continue_reclaim(struct pglist_data *pgdat,
unsigned long nr_reclaimed,
- unsigned long nr_scanned,
struct scan_control *sc)
{
unsigned long pages_for_compaction;
@@ -2597,40 +2596,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
if (!in_reclaim_compaction(sc))
return false;
- /* Consider stopping depending on scan and reclaim activity */
- if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
- /*
- * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
- * full LRU list has been scanned and we are still failing
- * to reclaim pages. This full LRU scan is potentially
- * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
- */
- if (!nr_reclaimed && !nr_scanned)
- return false;
- } else {
- /*
- * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
- * fail without consequence, stop if we failed to reclaim
- * any pages from the last SWAP_CLUSTER_MAX number of
- * pages that were scanned. This will return to the
- * caller faster at the risk reclaim/compaction and
- * the resulting allocation attempt fails
- */
- if (!nr_reclaimed)
- return false;
- }
-
/*
- * If we have not reclaimed enough pages for compaction and the
- * inactive lists are large enough, continue reclaiming
+ * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX
+ * number of pages that were scanned. This will return to the caller
+ * with the risk reclaim/compaction and the resulting allocation attempt
+ * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL
+ * allocations through requiring that the full LRU list has been scanned
+ * first, by assuming that zero delta of sc->nr_scanned means full LRU
+ * scan, but that approximation was wrong, and there were corner cases
+ * where always a non-zero amount of pages were scanned.
*/
- pages_for_compaction = compact_gap(sc->order);
- inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
- if (get_nr_swap_pages() > 0)
- inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
- if (sc->nr_reclaimed < pages_for_compaction &&
- inactive_lru_pages > pages_for_compaction)
- return true;
+ if (!nr_reclaimed)
+ return false;
/* If compaction would go ahead or the allocation would succeed, stop */
for (z = 0; z <= sc->reclaim_idx; z++) {
@@ -2647,7 +2624,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
;
}
}
- return true;
+
+ /*
+ * If we have not reclaimed enough pages for compaction and the
+ * inactive lists are large enough, continue reclaiming
+ */
+ pages_for_compaction = compact_gap(sc->order);
+ inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
+ if (get_nr_swap_pages() > 0)
+ inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
+
+ return inactive_lru_pages > pages_for_compaction;
}
static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
@@ -2664,10 +2651,6 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
- struct mem_cgroup_reclaim_cookie reclaim = {
- .pgdat = pgdat,
- .priority = sc->priority,
- };
unsigned long node_lru_pages = 0;
struct mem_cgroup *memcg;
@@ -2676,7 +2659,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ memcg = mem_cgroup_iter(root, NULL, NULL);
do {
unsigned long lru_pages;
unsigned long reclaimed;
@@ -2719,21 +2702,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);
- /*
- * Kswapd have to scan all memory cgroups to fulfill
- * the overall scan target for the node.
- *
- * Limit reclaim, on the other hand, only cares about
- * nr_to_reclaim pages to be reclaimed and it will
- * retry with decreasing priority if one round over the
- * whole hierarchy is not sufficient.
- */
- if (!current_is_kswapd() &&
- sc->nr_reclaimed >= sc->nr_to_reclaim) {
- mem_cgroup_iter_break(root, memcg);
- break;
- }
- } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+ } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -2810,7 +2779,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
wait_iff_congested(BLK_RW_ASYNC, HZ/10);
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
- sc->nr_scanned - nr_scanned, sc));
+ sc));
/*
* Kswapd gives up on balancing particular nodes after too
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fd7e16ca6996..6afc892a148a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1158,6 +1158,8 @@ const char * const vmstat_text[] = {
"nr_shmem",
"nr_shmem_hugepages",
"nr_shmem_pmdmapped",
+ "nr_file_hugepages",
+ "nr_file_pmdmapped",
"nr_anon_transparent_hugepages",
"nr_unstable",
"nr_vmscan_write",
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 75b7962439ff..05bdf90646e7 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -41,7 +41,6 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/wait.h>
#include <linux/zpool.h>
#include <linux/magic.h>
@@ -146,8 +145,6 @@ struct z3fold_header {
* @release_wq: workqueue for safe page release
* @work: work_struct for safe page release
* @inode: inode for z3fold pseudo filesystem
- * @destroying: bool to stop migration once we start destruction
- * @isolated: int to count the number of pages currently in isolation
*
* This structure is allocated at pool creation time and maintains metadata
* pertaining to a particular z3fold pool.
@@ -166,11 +163,8 @@ struct z3fold_pool {
const struct zpool_ops *zpool_ops;
struct workqueue_struct *compact_wq;
struct workqueue_struct *release_wq;
- struct wait_queue_head isolate_wait;
struct work_struct work;
struct inode *inode;
- bool destroying;
- int isolated;
};
/*
@@ -301,14 +295,11 @@ static void z3fold_unregister_migration(struct z3fold_pool *pool)
}
/* Initializes the z3fold header of a newly allocated z3fold page */
-static struct z3fold_header *init_z3fold_page(struct page *page,
+static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
struct z3fold_pool *pool, gfp_t gfp)
{
struct z3fold_header *zhdr = page_address(page);
- struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
-
- if (!slots)
- return NULL;
+ struct z3fold_buddy_slots *slots;
INIT_LIST_HEAD(&page->lru);
clear_bit(PAGE_HEADLESS, &page->private);
@@ -316,6 +307,12 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
clear_bit(NEEDS_COMPACTING, &page->private);
clear_bit(PAGE_STALE, &page->private);
clear_bit(PAGE_CLAIMED, &page->private);
+ if (headless)
+ return zhdr;
+
+ slots = alloc_slots(pool, gfp);
+ if (!slots)
+ return NULL;
spin_lock_init(&zhdr->page_lock);
kref_init(&zhdr->refcount);
@@ -372,9 +369,10 @@ static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
* Encodes the handle of a particular buddy within a z3fold page
* Pool lock should be held as this function accesses first_num
*/
-static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
+static unsigned long __encode_handle(struct z3fold_header *zhdr,
+ struct z3fold_buddy_slots *slots,
+ enum buddy bud)
{
- struct z3fold_buddy_slots *slots;
unsigned long h = (unsigned long)zhdr;
int idx = 0;
@@ -391,11 +389,15 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
if (bud == LAST)
h |= (zhdr->last_chunks << BUDDY_SHIFT);
- slots = zhdr->slots;
slots->slot[idx] = h;
return (unsigned long)&slots->slot[idx];
}
+static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
+{
+ return __encode_handle(zhdr, zhdr->slots, bud);
+}
+
/* Returns the z3fold page where a given handle is stored */
static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
{
@@ -630,6 +632,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
}
if (unlikely(PageIsolated(page) ||
+ test_bit(PAGE_CLAIMED, &page->private) ||
test_bit(PAGE_STALE, &page->private))) {
z3fold_page_unlock(zhdr);
return;
@@ -775,7 +778,6 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
goto out_c;
spin_lock_init(&pool->lock);
spin_lock_init(&pool->stale_lock);
- init_waitqueue_head(&pool->isolate_wait);
pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
if (!pool->unbuddied)
goto out_pool;
@@ -815,15 +817,6 @@ out:
return NULL;
}
-static bool pool_isolated_are_drained(struct z3fold_pool *pool)
-{
- bool ret;
-
- spin_lock(&pool->lock);
- ret = pool->isolated == 0;
- spin_unlock(&pool->lock);
- return ret;
-}
/**
* z3fold_destroy_pool() - destroys an existing z3fold pool
* @pool: the z3fold pool to be destroyed
@@ -833,22 +826,6 @@ static bool pool_isolated_are_drained(struct z3fold_pool *pool)
static void z3fold_destroy_pool(struct z3fold_pool *pool)
{
kmem_cache_destroy(pool->c_handle);
- /*
- * We set pool-> destroying under lock to ensure that
- * z3fold_page_isolate() sees any changes to destroying. This way we
- * avoid the need for any memory barriers.
- */
-
- spin_lock(&pool->lock);
- pool->destroying = true;
- spin_unlock(&pool->lock);
-
- /*
- * We need to ensure that no pages are being migrated while we destroy
- * these workqueues, as migration can queue work on either of the
- * workqueues.
- */
- wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
/*
* We need to destroy pool->compact_wq before pool->release_wq,
@@ -956,7 +933,7 @@ retry:
if (!page)
return -ENOMEM;
- zhdr = init_z3fold_page(page, pool, gfp);
+ zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp);
if (!zhdr) {
__free_page(page);
return -ENOMEM;
@@ -1132,6 +1109,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
struct z3fold_header *zhdr = NULL;
struct page *page = NULL;
struct list_head *pos;
+ struct z3fold_buddy_slots slots;
unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
spin_lock(&pool->lock);
@@ -1150,16 +1128,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
/* this bit could have been set by free, in which case
* we pass over to the next page in the pool.
*/
- if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+ if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+ page = NULL;
continue;
+ }
- if (unlikely(PageIsolated(page)))
+ if (unlikely(PageIsolated(page))) {
+ clear_bit(PAGE_CLAIMED, &page->private);
+ page = NULL;
continue;
+ }
+ zhdr = page_address(page);
if (test_bit(PAGE_HEADLESS, &page->private))
break;
- zhdr = page_address(page);
if (!z3fold_page_trylock(zhdr)) {
+ clear_bit(PAGE_CLAIMED, &page->private);
zhdr = NULL;
continue; /* can't evict at this point */
}
@@ -1177,26 +1161,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
if (!test_bit(PAGE_HEADLESS, &page->private)) {
/*
- * We need encode the handles before unlocking, since
- * we can race with free that will set
- * (first|last)_chunks to 0
+ * We need encode the handles before unlocking, and
+ * use our local slots structure because z3fold_free
+ * can zero out zhdr->slots and we can't do much
+ * about that
*/
first_handle = 0;
last_handle = 0;
middle_handle = 0;
if (zhdr->first_chunks)
- first_handle = encode_handle(zhdr, FIRST);
+ first_handle = __encode_handle(zhdr, &slots,
+ FIRST);
if (zhdr->middle_chunks)
- middle_handle = encode_handle(zhdr, MIDDLE);
+ middle_handle = __encode_handle(zhdr, &slots,
+ MIDDLE);
if (zhdr->last_chunks)
- last_handle = encode_handle(zhdr, LAST);
+ last_handle = __encode_handle(zhdr, &slots,
+ LAST);
/*
* it's safe to unlock here because we hold a
* reference to this page
*/
z3fold_page_unlock(zhdr);
} else {
- first_handle = encode_handle(zhdr, HEADLESS);
+ first_handle = __encode_handle(zhdr, &slots, HEADLESS);
last_handle = middle_handle = 0;
}
@@ -1226,9 +1214,9 @@ next:
spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
+ clear_bit(PAGE_CLAIMED, &page->private);
} else {
z3fold_page_lock(zhdr);
- clear_bit(PAGE_CLAIMED, &page->private);
if (kref_put(&zhdr->refcount,
release_z3fold_page_locked)) {
atomic64_dec(&pool->pages_nr);
@@ -1243,6 +1231,7 @@ next:
list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
+ clear_bit(PAGE_CLAIMED, &page->private);
}
/* We started off locked to we need to lock the pool back */
@@ -1339,28 +1328,6 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
return atomic64_read(&pool->pages_nr);
}
-/*
- * z3fold_dec_isolated() expects to be called while pool->lock is held.
- */
-static void z3fold_dec_isolated(struct z3fold_pool *pool)
-{
- assert_spin_locked(&pool->lock);
- VM_BUG_ON(pool->isolated <= 0);
- pool->isolated--;
-
- /*
- * If we have no more isolated pages, we have to see if
- * z3fold_destroy_pool() is waiting for a signal.
- */
- if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
- wake_up_all(&pool->isolate_wait);
-}
-
-static void z3fold_inc_isolated(struct z3fold_pool *pool)
-{
- pool->isolated++;
-}
-
static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
{
struct z3fold_header *zhdr;
@@ -1369,7 +1336,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(PageIsolated(page), page);
- if (test_bit(PAGE_HEADLESS, &page->private))
+ if (test_bit(PAGE_HEADLESS, &page->private) ||
+ test_bit(PAGE_CLAIMED, &page->private))
return false;
zhdr = page_address(page);
@@ -1387,34 +1355,6 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
spin_lock(&pool->lock);
if (!list_empty(&page->lru))
list_del(&page->lru);
- /*
- * We need to check for destruction while holding pool->lock, as
- * otherwise destruction could see 0 isolated pages, and
- * proceed.
- */
- if (unlikely(pool->destroying)) {
- spin_unlock(&pool->lock);
- /*
- * If this page isn't stale, somebody else holds a
- * reference to it. Let't drop our refcount so that they
- * can call the release logic.
- */
- if (unlikely(kref_put(&zhdr->refcount,
- release_z3fold_page_locked))) {
- /*
- * If we get here we have kref problems, so we
- * should freak out.
- */
- WARN(1, "Z3fold is experiencing kref problems\n");
- z3fold_page_unlock(zhdr);
- return false;
- }
- z3fold_page_unlock(zhdr);
- return false;
- }
-
-
- z3fold_inc_isolated(pool);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
return true;
@@ -1483,10 +1423,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
- spin_lock(&pool->lock);
- z3fold_dec_isolated(pool);
- spin_unlock(&pool->lock);
-
page_mapcount_reset(page);
put_page(page);
return 0;
@@ -1506,14 +1442,10 @@ static void z3fold_page_putback(struct page *page)
INIT_LIST_HEAD(&page->lru);
if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
atomic64_dec(&pool->pages_nr);
- spin_lock(&pool->lock);
- z3fold_dec_isolated(pool);
- spin_unlock(&pool->lock);
return;
}
spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru);
- z3fold_dec_isolated(pool);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
}
diff --git a/mm/zpool.c b/mm/zpool.c
index a2dd9107857d..863669212070 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -239,6 +239,22 @@ const char *zpool_get_type(struct zpool *zpool)
}
/**
+ * zpool_malloc_support_movable() - Check if the zpool support
+ * allocate movable memory
+ * @zpool: The zpool to check
+ *
+ * This returns if the zpool support allocate movable memory.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: true if if the zpool support allocate movable memory, false if not
+ */
+bool zpool_malloc_support_movable(struct zpool *zpool)
+{
+ return zpool->driver->malloc_support_movable;
+}
+
+/**
* zpool_malloc() - Allocate memory
* @zpool: The zpool to allocate from.
* @size: The amount of memory to allocate.
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index e98bb6ab4f7e..2b2b9aae8a3c 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -443,15 +443,16 @@ static u64 zs_zpool_total_size(void *pool)
}
static struct zpool_driver zs_zpool_driver = {
- .type = "zsmalloc",
- .owner = THIS_MODULE,
- .create = zs_zpool_create,
- .destroy = zs_zpool_destroy,
- .malloc = zs_zpool_malloc,
- .free = zs_zpool_free,
- .map = zs_zpool_map,
- .unmap = zs_zpool_unmap,
- .total_size = zs_zpool_total_size,
+ .type = "zsmalloc",
+ .owner = THIS_MODULE,
+ .create = zs_zpool_create,
+ .destroy = zs_zpool_destroy,
+ .malloc_support_movable = true,
+ .malloc = zs_zpool_malloc,
+ .free = zs_zpool_free,
+ .map = zs_zpool_map,
+ .unmap = zs_zpool_unmap,
+ .total_size = zs_zpool_total_size,
};
MODULE_ALIAS("zpool-zsmalloc");
@@ -476,10 +477,6 @@ static inline int get_zspage_inuse(struct zspage *zspage)
return zspage->inuse;
}
-static inline void set_zspage_inuse(struct zspage *zspage, int val)
-{
- zspage->inuse = val;
-}
static inline void mod_zspage_inuse(struct zspage *zspage, int val)
{
diff --git a/mm/zswap.c b/mm/zswap.c
index 0e22744a76cb..46a322316e52 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -856,7 +856,6 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
/* extract swpentry from data */
zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
swpentry = zhdr->swpentry; /* here */
- zpool_unmap_handle(pool, handle);
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
@@ -866,6 +865,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
if (!entry) {
/* entry was invalidated */
spin_unlock(&tree->lock);
+ zpool_unmap_handle(pool, handle);
return 0;
}
spin_unlock(&tree->lock);
@@ -886,15 +886,13 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
- ZPOOL_MM_RO) + sizeof(struct zswap_header);
+ src = (u8 *)zhdr + sizeof(struct zswap_header);
dst = kmap_atomic(page);
tfm = *get_cpu_ptr(entry->pool->tfm);
ret = crypto_comp_decompress(tfm, src, entry->length,
dst, &dlen);
put_cpu_ptr(entry->pool->tfm);
kunmap_atomic(dst);
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
@@ -940,6 +938,7 @@ fail:
spin_unlock(&tree->lock);
end:
+ zpool_unmap_handle(pool, handle);
return ret;
}
@@ -997,6 +996,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
char *buf;
u8 *src, *dst;
struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
+ gfp_t gfp;
/* THP isn't supported */
if (PageTransHuge(page)) {
@@ -1070,9 +1070,10 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* store */
hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
- ret = zpool_malloc(entry->pool->zpool, hlen + dlen,
- __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM,
- &handle);
+ gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+ if (zpool_malloc_support_movable(entry->pool->zpool))
+ gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
+ ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
if (ret == -ENOSPC) {
zswap_reject_compress_poor++;
goto put_dstmem;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 947b8ff0227e..bba3104f128f 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -206,14 +206,7 @@ static int xdp_umem_map_pages(struct xdp_umem *umem)
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
- unsigned int i;
-
- for (i = 0; i < umem->npgs; i++) {
- struct page *page = umem->pgs[i];
-
- set_page_dirty_lock(page);
- put_page(page);
- }
+ put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
kfree(umem->pgs);
umem->pgs = NULL;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c2f1af3b6a7c..fa8fbb8fa3c8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -977,7 +977,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
/* Matches the smp_wmb() in xsk_init_queue */
smp_rmb();
qpg = virt_to_head_page(q->ring);
- if (size > (PAGE_SIZE << compound_order(qpg)))
+ if (size > page_size(qpg))
return -EINVAL;
pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
diff --git a/sound/firewire/dice/dice-alesis.c b/sound/firewire/dice/dice-alesis.c
index 218292bdace6..f5b325263b67 100644
--- a/sound/firewire/dice/dice-alesis.c
+++ b/sound/firewire/dice/dice-alesis.c
@@ -15,7 +15,7 @@ alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
static const unsigned int
alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
- {10, 10, 8}, /* Tx0 = Analog + S/PDIF. */
+ {10, 10, 4}, /* Tx0 = Analog + S/PDIF. */
{16, 8, 0}, /* Tx1 = ADAT1 + ADAT2. */
};
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 91e71be42fa4..240f4ca76391 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2485,8 +2485,7 @@ static const struct pci_device_id azx_ids[] = {
AZX_DCAPS_PM_RUNTIME },
/* AMD Raven */
{ PCI_DEVICE(0x1022, 0x15e3),
- .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |
- AZX_DCAPS_PM_RUNTIME },
+ .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB },
/* ATI HDMI */
{ PCI_DEVICE(0x1002, 0x0002),
.driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index e283966bdbb1..bc9dd8e6fd86 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -357,6 +357,7 @@ static const struct hda_fixup ad1986a_fixups[] = {
static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x30af, "HP B2800", AD1986A_FIXUP_LAPTOP_IMIC),
+ SND_PCI_QUIRK(0x1043, 0x1153, "ASUS M9V", AD1986A_FIXUP_LAPTOP_IMIC),
SND_PCI_QUIRK(0x1043, 0x1443, "ASUS Z99He", AD1986A_FIXUP_EAPD),
SND_PCI_QUIRK(0x1043, 0x1447, "ASUS A8JN", AD1986A_FIXUP_EAPD),
SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8100, "ASUS P5", AD1986A_FIXUP_3STACK),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index da1695418731..b000b36ac3c6 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5817,6 +5817,7 @@ enum {
ALC292_FIXUP_DELL_E7X,
ALC292_FIXUP_DISABLE_AAMIX,
ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
+ ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE,
ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
ALC275_FIXUP_DELL_XPS,
@@ -5871,6 +5872,7 @@ enum {
ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
ALC299_FIXUP_PREDATOR_SPK,
ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
+ ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -6506,6 +6508,15 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC292_FIXUP_DISABLE_AAMIX
},
+ [ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x18, 0x01a1913c }, /* headset mic w/o jack detect */
+ { }
+ },
+ .chained_before = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE,
+ },
[ALC298_FIXUP_DELL1_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -6927,6 +6938,16 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
},
+ [ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x04a11040 },
+ { 0x21, 0x04211020 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7190,6 +7211,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
+ SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE),
#if 0
/* Below is a quirk table taken from the old code.
@@ -7358,6 +7380,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"},
{.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
{.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"},
+ {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"},
{}
};
#define ALC225_STANDARD_PINS \
@@ -7770,6 +7793,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x17, 0x90170110},
{0x1a, 0x03011020},
{0x21, 0x03211030}),
+ SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE,
+ {0x12, 0xb7a60140},
+ {0x17, 0x90170110},
+ {0x1a, 0x03a11030},
+ {0x21, 0x03211020}),
SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
ALC225_STANDARD_PINS,
{0x12, 0xb7a60130},
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index 48e9eef34c0f..ca603397651c 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -116,19 +116,16 @@ static struct atmel_pcm_dma_params ssc_dma_params[NUM_SSC_DEVICES][2] = {
static struct atmel_ssc_info ssc_info[NUM_SSC_DEVICES] = {
{
.name = "ssc0",
- .lock = __SPIN_LOCK_UNLOCKED(ssc_info[0].lock),
.dir_mask = SSC_DIR_MASK_UNUSED,
.initialized = 0,
},
{
.name = "ssc1",
- .lock = __SPIN_LOCK_UNLOCKED(ssc_info[1].lock),
.dir_mask = SSC_DIR_MASK_UNUSED,
.initialized = 0,
},
{
.name = "ssc2",
- .lock = __SPIN_LOCK_UNLOCKED(ssc_info[2].lock),
.dir_mask = SSC_DIR_MASK_UNUSED,
.initialized = 0,
},
@@ -317,13 +314,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
snd_soc_dai_set_dma_data(dai, substream, dma_params);
- spin_lock_irq(&ssc_p->lock);
- if (ssc_p->dir_mask & dir_mask) {
- spin_unlock_irq(&ssc_p->lock);
+ if (ssc_p->dir_mask & dir_mask)
return -EBUSY;
- }
+
ssc_p->dir_mask |= dir_mask;
- spin_unlock_irq(&ssc_p->lock);
return 0;
}
@@ -355,7 +349,6 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
dir_mask = 1 << dir;
- spin_lock_irq(&ssc_p->lock);
ssc_p->dir_mask &= ~dir_mask;
if (!ssc_p->dir_mask) {
if (ssc_p->initialized) {
@@ -369,7 +362,6 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
ssc_p->cmr_div = ssc_p->tcmr_period = ssc_p->rcmr_period = 0;
ssc_p->forced_divider = 0;
}
- spin_unlock_irq(&ssc_p->lock);
/* Shutdown the SSC clock. */
pr_debug("atmel_ssc_dai: Stopping clock\n");
diff --git a/sound/soc/atmel/atmel_ssc_dai.h b/sound/soc/atmel/atmel_ssc_dai.h
index ae764cb541c7..3470b966e449 100644
--- a/sound/soc/atmel/atmel_ssc_dai.h
+++ b/sound/soc/atmel/atmel_ssc_dai.h
@@ -93,7 +93,6 @@ struct atmel_ssc_state {
struct atmel_ssc_info {
char *name;
struct ssc_device *ssc;
- spinlock_t lock; /* lock for dir_mask */
unsigned short dir_mask; /* 0=unused, 1=playback, 2=capture */
unsigned short initialized; /* true if SSC has been initialized */
unsigned short daifmt;
diff --git a/sound/soc/codecs/pcm3168a.c b/sound/soc/codecs/pcm3168a.c
index 50ed86d45c26..88b75695fbf7 100644
--- a/sound/soc/codecs/pcm3168a.c
+++ b/sound/soc/codecs/pcm3168a.c
@@ -21,8 +21,7 @@
#define PCM3168A_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \
SNDRV_PCM_FMTBIT_S24_3LE | \
- SNDRV_PCM_FMTBIT_S24_LE | \
- SNDRV_PCM_FMTBIT_S32_LE)
+ SNDRV_PCM_FMTBIT_S24_LE)
#define PCM3168A_FMT_I2S 0x0
#define PCM3168A_FMT_LEFT_J 0x1
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index ef0b74693093..b517e4bc1b87 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -628,6 +628,16 @@ static int fsl_sai_startup(struct snd_pcm_substream *substream,
FSL_SAI_CR3_TRCE_MASK,
FSL_SAI_CR3_TRCE);
+ /*
+ * EDMA controller needs period size to be a multiple of
+ * tx/rx maxburst
+ */
+ if (sai->soc_data->use_edma)
+ snd_pcm_hw_constraint_step(substream->runtime, 0,
+ SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+ tx ? sai->dma_params_tx.maxburst :
+ sai->dma_params_rx.maxburst);
+
ret = snd_pcm_hw_constraint_list(substream->runtime, 0,
SNDRV_PCM_HW_PARAM_RATE, &fsl_sai_rate_constraints);
@@ -1026,30 +1036,35 @@ static int fsl_sai_remove(struct platform_device *pdev)
static const struct fsl_sai_soc_data fsl_sai_vf610_data = {
.use_imx_pcm = false,
+ .use_edma = false,
.fifo_depth = 32,
.reg_offset = 0,
};
static const struct fsl_sai_soc_data fsl_sai_imx6sx_data = {
.use_imx_pcm = true,
+ .use_edma = false,
.fifo_depth = 32,
.reg_offset = 0,
};
static const struct fsl_sai_soc_data fsl_sai_imx7ulp_data = {
.use_imx_pcm = true,
+ .use_edma = false,
.fifo_depth = 16,
.reg_offset = 8,
};
static const struct fsl_sai_soc_data fsl_sai_imx8mq_data = {
.use_imx_pcm = true,
+ .use_edma = false,
.fifo_depth = 128,
.reg_offset = 8,
};
static const struct fsl_sai_soc_data fsl_sai_imx8qm_data = {
.use_imx_pcm = true,
+ .use_edma = true,
.fifo_depth = 64,
.reg_offset = 0,
};
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index b12cb578f6d0..76b15deea80c 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -157,6 +157,7 @@
struct fsl_sai_soc_data {
bool use_imx_pcm;
+ bool use_edma;
unsigned int fifo_depth;
unsigned int reg_offset;
};
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index f6a7466622ea..fc5d089868df 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -286,6 +286,11 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
if (rsnd_ssi_is_multi_slave(mod, io))
return 0;
+ if (rsnd_runtime_is_tdm_split(io))
+ chan = rsnd_io_converted_chan(io);
+
+ chan = rsnd_channel_normalization(chan);
+
if (ssi->usrcnt > 0) {
if (ssi->rate != rate) {
dev_err(dev, "SSI parent/child should use same rate\n");
@@ -300,11 +305,6 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
return 0;
}
- if (rsnd_runtime_is_tdm_split(io))
- chan = rsnd_io_converted_chan(io);
-
- chan = rsnd_channel_normalization(chan);
-
main_rate = rsnd_ssi_clk_query(rdai, rate, chan, &idx);
if (!main_rate) {
dev_err(dev, "unsupported clock rate\n");
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 35f48e9c5ead..88978a3036c4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -978,7 +978,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
/* For framework level robustness */
snd_soc_component_set_jack(component, NULL, NULL);
- list_del(&component->card_list);
+ list_del_init(&component->card_list);
snd_soc_dapm_free(snd_soc_component_get_dapm(component));
soc_cleanup_component_debugfs(component);
component->card = NULL;
diff --git a/sound/soc/ti/Kconfig b/sound/soc/ti/Kconfig
index 87a9b9dd4e98..29f61053ab62 100644
--- a/sound/soc/ti/Kconfig
+++ b/sound/soc/ti/Kconfig
@@ -200,11 +200,18 @@ config SND_SOC_DM365_AIC3X_CODEC
config SND_SOC_DM365_VOICE_CODEC
bool "Voice Codec - CQ93VC"
- select MFD_DAVINCI_VOICECODEC
- select SND_SOC_CQ0093VC
help
Say Y if you want to add support for SoC On-chip voice codec
endchoice
+config SND_SOC_DM365_VOICE_CODEC_MODULE
+ def_tristate y
+ depends on SND_SOC_DM365_VOICE_CODEC && SND_SOC
+ select MFD_DAVINCI_VOICECODEC
+ select SND_SOC_CQ0093VC
+ help
+ The is an internal symbol needed to ensure that the codec
+ and MFD driver can be built as loadable modules if necessary.
+
endmenu
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 25faf2d3c639..fbfde996fee7 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1658,6 +1658,8 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
case 0x25ce: /* Mytek devices */
case 0x278b: /* Rotel? */
case 0x2ab6: /* T+A devices */
+ case 0x3842: /* EVGA */
+ case 0xc502: /* HiBy devices */
if (fp->dsd_raw)
return SNDRV_PCM_FMTBIT_DSD_U32_BE;
break;
diff --git a/tools/hv/Build b/tools/hv/Build
new file mode 100644
index 000000000000..6cf51fa4b306
--- /dev/null
+++ b/tools/hv/Build
@@ -0,0 +1,3 @@
+hv_kvp_daemon-y += hv_kvp_daemon.o
+hv_vss_daemon-y += hv_vss_daemon.o
+hv_fcopy_daemon-y += hv_fcopy_daemon.o
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 5db5e62cebda..b57143d9459c 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -1,28 +1,55 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for Hyper-V tools
-
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
-
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+include ../scripts/Makefile.include
sbindir ?= /usr/sbin
libexecdir ?= /usr/libexec
sharedstatedir ?= /var/lib
-ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
all: $(ALL_PROGRAMS)
-%: %.c
- $(CC) $(CFLAGS) -o $@ $^
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
+$(HV_KVP_DAEMON_IN): FORCE
+ $(Q)$(MAKE) $(build)=hv_kvp_daemon
+$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
+$(HV_VSS_DAEMON_IN): FORCE
+ $(Q)$(MAKE) $(build)=hv_vss_daemon
+$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
+$(HV_FCOPY_DAEMON_IN): FORCE
+ $(Q)$(MAKE) $(build)=hv_fcopy_daemon
+$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
clean:
- $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ rm -f $(ALL_PROGRAMS)
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
-install: all
+install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(sbindir); \
install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
install -d -m 755 $(DESTDIR)$(sharedstatedir); \
@@ -33,3 +60,7 @@ install: all
for script in $(ALL_SCRIPTS); do \
install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 59753b3917bb..2a9890c8395a 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -38,6 +38,7 @@ static int fact_avx = 0xFF;
static unsigned long long fact_trl;
static int out_format_json;
static int cmd_help;
+static int force_online_offline;
/* clos related */
static int current_clos = -1;
@@ -138,14 +139,14 @@ int out_format_is_json(void)
int get_physical_package_id(int cpu)
{
return parse_int_file(
- 1, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+ 0, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
cpu);
}
int get_physical_core_id(int cpu)
{
return parse_int_file(
- 1, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+ 0, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
}
int get_physical_die_id(int cpu)
@@ -165,6 +166,26 @@ int get_topo_max_cpus(void)
return topo_max_cpus;
}
+static void set_cpu_online_offline(int cpu, int state)
+{
+ char buffer[128];
+ int fd;
+
+ snprintf(buffer, sizeof(buffer),
+ "/sys/devices/system/cpu/cpu%d/online", cpu);
+
+ fd = open(buffer, O_WRONLY);
+ if (fd < 0)
+ err(-1, "%s open failed", buffer);
+
+ if (state)
+ write(fd, "1\n", 2);
+ else
+ write(fd, "0\n", 2);
+
+ close(fd);
+}
+
#define MAX_PACKAGE_COUNT 8
#define MAX_DIE_PER_PACKAGE 2
static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
@@ -402,6 +423,9 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask,
int j;
for (j = 0; j < topo_max_cpus; ++j) {
+ if (!CPU_ISSET_S(j, present_cpumask_size, present_cpumask))
+ continue;
+
if (cpu_map[j].pkg_id == pkg_id &&
cpu_map[j].die_id == die_id &&
cpu_map[j].punit_cpu_core == i) {
@@ -484,7 +508,7 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
int write = 0;
int clos_id, core_id, ret = 0;
- debug_printf("CLOS %d\n", cpu);
+ debug_printf("CPU %d\n", cpu);
if (parameter & BIT(MBOX_CMD_WRITE_BIT)) {
value = req_data;
@@ -649,8 +673,8 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
if (ret)
perror("get_tdp_*");
else
- isst_display_result(cpu, outf, "perf-profile", (char *)arg3,
- *(unsigned int *)arg4);
+ isst_ctdp_display_core_info(cpu, outf, arg3,
+ *(unsigned int *)arg4);
}
#define _get_tdp_level(desc, suffix, object, help) \
@@ -733,9 +757,34 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_set_tdp_level(cpu, tdp_level);
if (ret)
perror("set_tdp_level_for_cpu");
- else
+ else {
isst_display_result(cpu, outf, "perf-profile", "set_tdp_level",
ret);
+ if (force_online_offline) {
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ int pkg_id = get_physical_package_id(cpu);
+ int die_id = get_physical_die_id(cpu);
+
+ fprintf(stderr, "Option is set to online/offline\n");
+ ctdp_level.core_cpumask_size =
+ alloc_cpu_set(&ctdp_level.core_cpumask);
+ isst_get_coremask_info(cpu, tdp_level, &ctdp_level);
+ if (ctdp_level.cpu_count) {
+ int i, max_cpus = get_topo_max_cpus();
+ for (i = 0; i < max_cpus; ++i) {
+ if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+ continue;
+ if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+ fprintf(stderr, "online cpu %d\n", i);
+ set_cpu_online_offline(i, 1);
+ } else {
+ fprintf(stderr, "offline cpu %d\n", i);
+ set_cpu_online_offline(i, 0);
+ }
+ }
+ }
+ }
+ }
}
static void set_tdp_level(void)
@@ -744,6 +793,8 @@ static void set_tdp_level(void)
fprintf(stderr, "Set Config TDP level\n");
fprintf(stderr,
"\t Arguments: -l|--level : Specify tdp level\n");
+ fprintf(stderr,
+ "\t Optional Arguments: -o | online : online/offline for the tdp level\n");
exit(0);
}
@@ -1082,6 +1133,40 @@ static void dump_clos_config(void)
isst_ctdp_display_information_end(outf);
}
+static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
+ void *arg4)
+{
+ int enable, ret, prio_type;
+
+ ret = isst_clos_get_clos_information(cpu, &enable, &prio_type);
+ if (ret)
+ perror("isst_clos_get_info");
+ else
+ isst_clos_display_clos_information(cpu, outf, enable, prio_type);
+}
+
+static void dump_clos_info(void)
+{
+ if (cmd_help) {
+ fprintf(stderr,
+ "Print Intel Speed Select Technology core power information\n");
+ fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
+ exit(0);
+ }
+
+ if (!max_target_cpus) {
+ fprintf(stderr,
+ "Invalid target cpu. Specify with [-c|--cpu]\n");
+ exit(0);
+ }
+
+ isst_ctdp_display_information_start(outf);
+ for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
+ NULL, NULL, NULL);
+ isst_ctdp_display_information_end(outf);
+
+}
+
static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
void *arg4)
{
@@ -1198,7 +1283,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
if (ret)
perror("isst_clos_get_assoc_status");
else
- isst_display_result(cpu, outf, "core-power", "get-assoc", clos);
+ isst_clos_display_assoc_information(cpu, outf, clos);
}
static void get_clos_assoc(void)
@@ -1208,13 +1293,17 @@ static void get_clos_assoc(void)
fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
exit(0);
}
- if (max_target_cpus)
- for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
- NULL, NULL, NULL);
- else {
+
+ if (!max_target_cpus) {
fprintf(stderr,
"Invalid target cpu. Specify with [-c|--cpu]\n");
+ exit(0);
}
+
+ isst_ctdp_display_information_start(outf);
+ for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL,
+ NULL, NULL, NULL);
+ isst_ctdp_display_information_end(outf);
}
static struct process_cmd_struct isst_cmds[] = {
@@ -1231,10 +1320,11 @@ static struct process_cmd_struct isst_cmds[] = {
{ "turbo-freq", "info", dump_fact_config },
{ "turbo-freq", "enable", set_fact_enable },
{ "turbo-freq", "disable", set_fact_disable },
- { "core-power", "info", dump_clos_config },
+ { "core-power", "info", dump_clos_info },
{ "core-power", "enable", set_clos_enable },
{ "core-power", "disable", set_clos_disable },
{ "core-power", "config", set_clos_config },
+ { "core-power", "get-config", dump_clos_config },
{ "core-power", "assoc", set_clos_assoc },
{ "core-power", "get-assoc", get_clos_assoc },
{ NULL, NULL, NULL }
@@ -1316,6 +1406,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
static struct option long_options[] = {
{ "bucket", required_argument, 0, 'b' },
{ "level", required_argument, 0, 'l' },
+ { "online", required_argument, 0, 'o' },
{ "trl-type", required_argument, 0, 'r' },
{ "trl", required_argument, 0, 't' },
{ "help", no_argument, 0, 'h' },
@@ -1332,7 +1423,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
option_index = start;
optind = start + 1;
- while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:h",
+ while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:ho",
long_options, &option_index)) != -1) {
switch (opt) {
case 'b':
@@ -1344,6 +1435,9 @@ static void parse_cmd_args(int argc, int start, char **argv)
case 'l':
tdp_level = atoi(optarg);
break;
+ case 'o':
+ force_online_offline = 1;
+ break;
case 't':
sscanf(optarg, "0x%llx", &fact_trl);
break;
@@ -1362,7 +1456,6 @@ static void parse_cmd_args(int argc, int start, char **argv)
/* CLOS related */
case 'c':
current_clos = atoi(optarg);
- printf("clos %d\n", current_clos);
break;
case 'd':
clos_desired = atoi(optarg);
@@ -1433,6 +1526,7 @@ static void core_power_help(void)
printf("\tenable\n");
printf("\tdisable\n");
printf("\tconfig\n");
+ printf("\tget-config\n");
printf("\tassoc\n");
printf("\tget-assoc\n");
}
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 0bf341ad9697..6dee5332c9d3 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -619,6 +619,31 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
return 0;
}
+int isst_clos_get_clos_information(int cpu, int *enable, int *type)
+{
+ unsigned int resp;
+ int ret;
+
+ ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
+ &resp);
+ if (ret)
+ return ret;
+
+ debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp);
+
+ if (resp & BIT(1))
+ *enable = 1;
+ else
+ *enable = 0;
+
+ if (resp & BIT(2))
+ *type = 1;
+ else
+ *type = 0;
+
+ return 0;
+}
+
int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
{
unsigned int req, resp;
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index df4aa99c4e92..40346d534f78 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -287,6 +287,26 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
format_and_print(outf, base_level + 2, header, value);
}
+void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
+ unsigned int val)
+{
+ char header[256];
+ char value[256];
+
+ snprintf(header, sizeof(header), "package-%d",
+ get_physical_package_id(cpu));
+ format_and_print(outf, 1, header, NULL);
+ snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+ format_and_print(outf, 2, header, NULL);
+ snprintf(header, sizeof(header), "cpu-%d", cpu);
+ format_and_print(outf, 3, header, NULL);
+
+ snprintf(value, sizeof(value), "%u", val);
+ format_and_print(outf, 4, prefix, value);
+
+ format_and_print(outf, 1, NULL, NULL);
+}
+
void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
struct isst_pkg_ctdp *pkg_dev)
{
@@ -503,6 +523,57 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos,
format_and_print(outf, 1, NULL, NULL);
}
+void isst_clos_display_clos_information(int cpu, FILE *outf,
+ int clos_enable, int type)
+{
+ char header[256];
+ char value[256];
+
+ snprintf(header, sizeof(header), "package-%d",
+ get_physical_package_id(cpu));
+ format_and_print(outf, 1, header, NULL);
+ snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+ format_and_print(outf, 2, header, NULL);
+ snprintf(header, sizeof(header), "cpu-%d", cpu);
+ format_and_print(outf, 3, header, NULL);
+
+ snprintf(header, sizeof(header), "core-power");
+ format_and_print(outf, 4, header, NULL);
+
+ snprintf(header, sizeof(header), "enable-status");
+ snprintf(value, sizeof(value), "%d", clos_enable);
+ format_and_print(outf, 5, header, value);
+
+ snprintf(header, sizeof(header), "priority-type");
+ snprintf(value, sizeof(value), "%d", type);
+ format_and_print(outf, 5, header, value);
+
+ format_and_print(outf, 1, NULL, NULL);
+}
+
+void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos)
+{
+ char header[256];
+ char value[256];
+
+ snprintf(header, sizeof(header), "package-%d",
+ get_physical_package_id(cpu));
+ format_and_print(outf, 1, header, NULL);
+ snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
+ format_and_print(outf, 2, header, NULL);
+ snprintf(header, sizeof(header), "cpu-%d", cpu);
+ format_and_print(outf, 3, header, NULL);
+
+ snprintf(header, sizeof(header), "get-assoc");
+ format_and_print(outf, 4, header, NULL);
+
+ snprintf(header, sizeof(header), "clos");
+ snprintf(value, sizeof(value), "%d", clos);
+ format_and_print(outf, 5, header, value);
+
+ format_and_print(outf, 1, NULL, NULL);
+}
+
void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
int result)
{
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 2f7f62765eb6..d280b27d600d 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -187,12 +187,16 @@ extern int isst_send_msr_command(unsigned int cpu, unsigned int command,
int write, unsigned long long *req_resp);
extern int isst_get_ctdp_levels(int cpu, struct isst_pkg_ctdp *pkg_dev);
+extern int isst_get_coremask_info(int cpu, int config_index,
+ struct isst_pkg_ctdp_level_info *ctdp_level);
extern int isst_get_process_ctdp(int cpu, int tdp_level,
struct isst_pkg_ctdp *pkg_dev);
extern void isst_get_process_ctdp_complete(int cpu,
struct isst_pkg_ctdp *pkg_dev);
extern void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
struct isst_pkg_ctdp *pkg_dev);
+extern void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
+ unsigned int val);
extern void isst_ctdp_display_information_start(FILE *outf);
extern void isst_ctdp_display_information_end(FILE *outf);
extern void isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -223,10 +227,14 @@ extern int isst_clos_associate(int cpu, int clos);
extern int isst_clos_get_assoc_status(int cpu, int *clos_id);
extern void isst_clos_display_information(int cpu, FILE *outf, int clos,
struct isst_clos_config *clos_config);
-
+extern void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos);
extern int isst_read_reg(unsigned short reg, unsigned int *val);
extern int isst_write_reg(int reg, unsigned int val);
extern void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
int result);
+
+extern int isst_clos_get_clos_information(int cpu, int *enable, int *type);
+extern void isst_clos_display_clos_information(int cpu, FILE *outf,
+ int clos_enable, int type);
#endif
diff --git a/usr/Makefile b/usr/Makefile
index 6a89eb019275..e6f7cb2f81db 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -11,6 +11,9 @@ datafile_y = initramfs_data.cpio$(suffix_y)
datafile_d_y = .$(datafile_y).d
AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)"
+# clean rules do not have CONFIG_INITRAMFS_COMPRESSION. So clean up after all
+# possible compression formats.
+clean-files += initramfs_data.cpio*
# Generate builtin.o based on initramfs_data.o
obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o
OpenPOWER on IntegriCloud