diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 208 |
1 files changed, 125 insertions, 83 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index fbcb3c96a186..07a1d22807be 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -34,6 +34,7 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/khugepaged.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */ @@ -188,6 +189,38 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); } +static inline bool shmem_inode_acct_block(struct inode *inode, long pages) +{ + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + + if (shmem_acct_block(info->flags, pages)) + return false; + + if (sbinfo->max_blocks) { + if (percpu_counter_compare(&sbinfo->used_blocks, + sbinfo->max_blocks - pages) > 0) + goto unacct; + percpu_counter_add(&sbinfo->used_blocks, pages); + } + + return true; + +unacct: + shmem_unacct_blocks(info->flags, pages); + return false; +} + +static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages) +{ + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + + if (sbinfo->max_blocks) + percpu_counter_sub(&sbinfo->used_blocks, pages); + shmem_unacct_blocks(info->flags, pages); +} + static const struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; @@ -249,23 +282,20 @@ static void shmem_recalc_inode(struct inode *inode) freed = info->alloced - info->swapped - inode->i_mapping->nrpages; if (freed > 0) { - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) - percpu_counter_add(&sbinfo->used_blocks, -freed); info->alloced -= freed; inode->i_blocks -= freed * BLOCKS_PER_PAGE; - shmem_unacct_blocks(info->flags, freed); + shmem_inode_unacct_blocks(inode, freed); } } bool shmem_charge(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); unsigned long flags; - if (shmem_acct_block(info->flags, pages)) + if (!shmem_inode_acct_block(inode, pages)) return false; + spin_lock_irqsave(&info->lock, flags); info->alloced += pages; inode->i_blocks += pages * BLOCKS_PER_PAGE; @@ -273,26 +303,12 @@ bool shmem_charge(struct inode *inode, long pages) spin_unlock_irqrestore(&info->lock, flags); inode->i_mapping->nrpages += pages; - if (!sbinfo->max_blocks) - return true; - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - pages) > 0) { - inode->i_mapping->nrpages -= pages; - spin_lock_irqsave(&info->lock, flags); - info->alloced -= pages; - shmem_recalc_inode(inode); - spin_unlock_irqrestore(&info->lock, flags); - shmem_unacct_blocks(info->flags, pages); - return false; - } - percpu_counter_add(&sbinfo->used_blocks, pages); return true; } void shmem_uncharge(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); unsigned long flags; spin_lock_irqsave(&info->lock, flags); @@ -301,9 +317,7 @@ void shmem_uncharge(struct inode *inode, long pages) shmem_recalc_inode(inode); spin_unlock_irqrestore(&info->lock, flags); - if (sbinfo->max_blocks) - percpu_counter_sub(&sbinfo->used_blocks, pages); - shmem_unacct_blocks(info->flags, pages); + shmem_inode_unacct_blocks(inode, pages); } /* @@ -1452,9 +1466,10 @@ static struct page *shmem_alloc_page(gfp_t gfp, } static struct page *shmem_alloc_and_acct_page(gfp_t gfp, - struct shmem_inode_info *info, struct shmem_sb_info *sbinfo, + struct inode *inode, pgoff_t index, bool huge) { + struct shmem_inode_info *info = SHMEM_I(inode); struct page *page; int nr; int err = -ENOSPC; @@ -1463,14 +1478,8 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, huge = false; nr = huge ? HPAGE_PMD_NR : 1; - if (shmem_acct_block(info->flags, nr)) + if (!shmem_inode_acct_block(inode, nr)) goto failed; - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - nr) > 0) - goto unacct; - percpu_counter_add(&sbinfo->used_blocks, nr); - } if (huge) page = shmem_alloc_hugepage(gfp, info, index); @@ -1483,10 +1492,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, } err = -ENOMEM; - if (sbinfo->max_blocks) - percpu_counter_add(&sbinfo->used_blocks, -nr); -unacct: - shmem_unacct_blocks(info->flags, nr); + shmem_inode_unacct_blocks(inode, nr); failed: return ERR_PTR(err); } @@ -1644,7 +1650,7 @@ repeat: if (swap.val) { /* Look it up and read it in.. */ - page = lookup_swap_cache(swap); + page = lookup_swap_cache(swap, NULL, 0); if (!page) { /* Or update major stats only when swapin succeeds?? */ if (fault_type) { @@ -1751,10 +1757,9 @@ repeat: } alloc_huge: - page = shmem_alloc_and_acct_page(gfp, info, sbinfo, - index, true); + page = shmem_alloc_and_acct_page(gfp, inode, index, true); if (IS_ERR(page)) { -alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo, +alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, index, false); } if (IS_ERR(page)) { @@ -1876,10 +1881,7 @@ clear: * Error recovery. */ unacct: - if (sbinfo->max_blocks) - percpu_counter_sub(&sbinfo->used_blocks, - 1 << compound_order(page)); - shmem_unacct_blocks(info->flags, 1 << compound_order(page)); + shmem_inode_unacct_blocks(inode, 1 << compound_order(page)); if (PageTransHuge(page)) { unlock_page(page); @@ -2206,16 +2208,16 @@ bool shmem_mapping(struct address_space *mapping) return mapping->a_ops == &shmem_aops; } -int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - struct page **pagep) +static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + bool zeropage, + struct page **pagep) { struct inode *inode = file_inode(dst_vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; gfp_t gfp = mapping_gfp_mask(mapping); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); @@ -2227,33 +2229,30 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, int ret; ret = -ENOMEM; - if (shmem_acct_block(info->flags, 1)) + if (!shmem_inode_acct_block(inode, 1)) goto out; - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks) >= 0) - goto out_unacct_blocks; - percpu_counter_inc(&sbinfo->used_blocks); - } if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff); if (!page) - goto out_dec_used_blocks; - - page_kaddr = kmap_atomic(page); - ret = copy_from_user(page_kaddr, (const void __user *)src_addr, - PAGE_SIZE); - kunmap_atomic(page_kaddr); - - /* fallback to copy_from_user outside mmap_sem */ - if (unlikely(ret)) { - *pagep = page; - if (sbinfo->max_blocks) - percpu_counter_add(&sbinfo->used_blocks, -1); - shmem_unacct_blocks(info->flags, 1); - /* don't free the page */ - return -EFAULT; + goto out_unacct_blocks; + + if (!zeropage) { /* mcopy_atomic */ + page_kaddr = kmap_atomic(page); + ret = copy_from_user(page_kaddr, + (const void __user *)src_addr, + PAGE_SIZE); + kunmap_atomic(page_kaddr); + + /* fallback to copy_from_user outside mmap_sem */ + if (unlikely(ret)) { + *pagep = page; + shmem_inode_unacct_blocks(inode, 1); + /* don't free the page */ + return -EFAULT; + } + } else { /* mfill_zeropage_atomic */ + clear_highpage(page); } } else { page = *pagep; @@ -2314,14 +2313,33 @@ out_release_uncharge: out_release: unlock_page(page); put_page(page); -out_dec_used_blocks: - if (sbinfo->max_blocks) - percpu_counter_add(&sbinfo->used_blocks, -1); out_unacct_blocks: - shmem_unacct_blocks(info->flags, 1); + shmem_inode_unacct_blocks(inode, 1); goto out; } +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep) +{ + return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, src_addr, false, pagep); +} + +int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr) +{ + struct page *page = NULL; + + return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, 0, true, &page); +} + #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; static const struct inode_operations shmem_short_symlink_operations; @@ -3635,7 +3653,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) SYSCALL_DEFINE2(memfd_create, const char __user *, uname, @@ -3647,8 +3665,18 @@ SYSCALL_DEFINE2(memfd_create, char *name; long len; - if (flags & ~(unsigned int)MFD_ALL_FLAGS) - return -EINVAL; + if (!(flags & MFD_HUGETLB)) { + if (flags & ~(unsigned int)MFD_ALL_FLAGS) + return -EINVAL; + } else { + /* Sealing not supported in hugetlbfs (MFD_HUGETLB) */ + if (flags & MFD_ALLOW_SEALING) + return -EINVAL; + /* Allow huge page size encoding in flags. */ + if (flags & ~(unsigned int)(MFD_ALL_FLAGS | + (MFD_HUGE_MASK << MFD_HUGE_SHIFT))) + return -EINVAL; + } /* length includes terminating zero */ len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); @@ -3657,7 +3685,7 @@ SYSCALL_DEFINE2(memfd_create, if (len > MFD_NAME_MAX_LEN + 1) return -EINVAL; - name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY); + name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL); if (!name) return -ENOMEM; @@ -3679,16 +3707,30 @@ SYSCALL_DEFINE2(memfd_create, goto err_name; } - file = shmem_file_setup(name, 0, VM_NORESERVE); + if (flags & MFD_HUGETLB) { + struct user_struct *user = NULL; + + file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user, + HUGETLB_ANONHUGE_INODE, + (flags >> MFD_HUGE_SHIFT) & + MFD_HUGE_MASK); + } else + file = shmem_file_setup(name, 0, VM_NORESERVE); if (IS_ERR(file)) { error = PTR_ERR(file); goto err_fd; } - info = SHMEM_I(file_inode(file)); file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; file->f_flags |= O_RDWR | O_LARGEFILE; - if (flags & MFD_ALLOW_SEALING) + + if (flags & MFD_ALLOW_SEALING) { + /* + * flags check at beginning of function ensures + * this is not a hugetlbfs (MFD_HUGETLB) file. + */ + info = SHMEM_I(file_inode(file)); info->seals &= ~F_SEAL_SEAL; + } fd_install(fd, file); kfree(name); |