diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Kconfig | 12 | ||||
-rw-r--r-- | fs/proc/Makefile | 1 | ||||
-rw-r--r-- | fs/proc/array.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 104 | ||||
-rw-r--r-- | fs/proc/bootconfig.c | 89 | ||||
-rw-r--r-- | fs/proc/cpuinfo.c | 12 | ||||
-rw-r--r-- | fs/proc/generic.c | 75 | ||||
-rw-r--r-- | fs/proc/inode.c | 76 | ||||
-rw-r--r-- | fs/proc/internal.h | 7 | ||||
-rw-r--r-- | fs/proc/kcore.c | 19 | ||||
-rw-r--r-- | fs/proc/kmsg.c | 14 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 8 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 24 | ||||
-rw-r--r-- | fs/proc/page.c | 82 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 32 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 2 | ||||
-rw-r--r-- | fs/proc/root.c | 18 | ||||
-rw-r--r-- | fs/proc/stat.c | 68 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 90 | ||||
-rw-r--r-- | fs/proc/uptime.c | 3 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 18 |
21 files changed, 500 insertions, 256 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index cb5629bd5fff..27ef84d99f59 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -42,8 +42,8 @@ config PROC_VMCORE bool "/proc/vmcore support" depends on PROC_FS && CRASH_DUMP default y - help - Exports the dump image of crashed kernel in ELF format. + help + Exports the dump image of crashed kernel in ELF format. config PROC_VMCORE_DEVICE_DUMP bool "Device Hardware/Firmware Log Collection" @@ -72,7 +72,7 @@ config PROC_SYSCTL a recompile of the kernel or reboot of the system. The primary interface is through /proc/sys. If you say Y here a tree of modifiable sysctl entries will be generated beneath the - /proc/sys directory. They are explained in the files + /proc/sys directory. They are explained in the files in <file:Documentation/admin-guide/sysctl/>. Note that enabling this option will enlarge the kernel by at least 8 KB. @@ -88,7 +88,7 @@ config PROC_PAGE_MONITOR Various /proc files exist to monitor process memory utilization: /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, /proc/kpagecount, and /proc/kpageflags. Disabling these - interfaces will reduce the size of the kernel by approximately 4kb. + interfaces will reduce the size of the kernel by approximately 4kb. config PROC_CHILDREN bool "Include /proc/<pid>/task/<tid>/children file" @@ -103,3 +103,7 @@ config PROC_CHILDREN config PROC_PID_ARCH_STATUS def_bool n depends on PROC_FS + +config PROC_CPU_RESCTRL + def_bool n + depends on PROC_FS diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ead487e80510..bd08616ed8ba 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -33,3 +33,4 @@ proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o +proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o diff --git a/fs/proc/array.c b/fs/proc/array.c index 46dcb6f0eccf..5efaf3708ec6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -533,7 +533,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, nice = task_nice(task); /* convert nsec -> ticks */ - start_time = nsec_to_clock_t(task->real_start_time); + start_time = nsec_to_clock_t(task->start_boottime); seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns)); seq_puts(m, " ("); diff --git a/fs/proc/base.c b/fs/proc/base.c index ebea9501afb8..c7c64272b0fa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -94,6 +94,8 @@ #include <linux/sched/debug.h> #include <linux/sched/stat.h> #include <linux/posix-timers.h> +#include <linux/time_namespace.h> +#include <linux/resctrl.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -1533,6 +1535,96 @@ static const struct file_operations proc_pid_sched_autogroup_operations = { #endif /* CONFIG_SCHED_AUTOGROUP */ +#ifdef CONFIG_TIME_NS +static int timens_offsets_show(struct seq_file *m, void *v) +{ + struct task_struct *p; + + p = get_proc_task(file_inode(m->file)); + if (!p) + return -ESRCH; + proc_timens_show_offsets(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t timens_offsets_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file_inode(file); + struct proc_timens_offset offsets[2]; + char *kbuf = NULL, *pos, *next_line; + struct task_struct *p; + int ret, noffsets; + + /* Only allow < page size writes at the beginning of the file */ + if ((*ppos != 0) || (count >= PAGE_SIZE)) + return -EINVAL; + + /* Slurp in the user data */ + kbuf = memdup_user_nul(buf, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + /* Parse the user data */ + ret = -EINVAL; + noffsets = 0; + for (pos = kbuf; pos; pos = next_line) { + struct proc_timens_offset *off = &offsets[noffsets]; + int err; + + /* Find the end of line and ensure we don't look past it */ + next_line = strchr(pos, '\n'); + if (next_line) { + *next_line = '\0'; + next_line++; + if (*next_line == '\0') + next_line = NULL; + } + + err = sscanf(pos, "%u %lld %lu", &off->clockid, + &off->val.tv_sec, &off->val.tv_nsec); + if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) + goto out; + noffsets++; + if (noffsets == ARRAY_SIZE(offsets)) { + if (next_line) + count = next_line - kbuf; + break; + } + } + + ret = -ESRCH; + p = get_proc_task(inode); + if (!p) + goto out; + ret = proc_timens_set_offset(file, p, offsets, noffsets); + put_task_struct(p); + if (ret) + goto out; + + ret = count; +out: + kfree(kbuf); + return ret; +} + +static int timens_offsets_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, timens_offsets_show, inode); +} + +static const struct file_operations proc_timens_offsets_operations = { + .open = timens_offsets_open, + .read = seq_read, + .write = timens_offsets_write, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_TIME_NS */ + static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { @@ -1626,8 +1718,7 @@ static const char *proc_pid_get_link(struct dentry *dentry, if (error) goto out; - nd_jump_link(&path); - return NULL; + error = nd_jump_link(&path); out: return ERR_PTR(error); } @@ -3016,6 +3107,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHED_AUTOGROUP REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif +#ifdef CONFIG_TIME_NS + REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations), +#endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), @@ -3061,6 +3155,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif +#ifdef CONFIG_PROC_CPU_RESCTRL + ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), +#endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), @@ -3461,6 +3558,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif +#ifdef CONFIG_PROC_CPU_RESCTRL + ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), +#endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c new file mode 100644 index 000000000000..9955d75c0585 --- /dev/null +++ b/fs/proc/bootconfig.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * /proc/bootconfig - Extra boot configuration + */ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/printk.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/bootconfig.h> +#include <linux/slab.h> + +static char *saved_boot_config; + +static int boot_config_proc_show(struct seq_file *m, void *v) +{ + if (saved_boot_config) + seq_puts(m, saved_boot_config); + return 0; +} + +/* Rest size of buffer */ +#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0) + +/* Return the needed total length if @size is 0 */ +static int __init copy_xbc_key_value_list(char *dst, size_t size) +{ + struct xbc_node *leaf, *vnode; + const char *val; + char *key, *end = dst + size; + int ret = 0; + + key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); + + xbc_for_each_key_value(leaf, val) { + ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); + if (ret < 0) + break; + ret = snprintf(dst, rest(dst, end), "%s = ", key); + if (ret < 0) + break; + dst += ret; + vnode = xbc_node_get_child(leaf); + if (vnode && xbc_node_is_array(vnode)) { + xbc_array_for_each_value(vnode, val) { + ret = snprintf(dst, rest(dst, end), "\"%s\"%s", + val, vnode->next ? ", " : "\n"); + if (ret < 0) + goto out; + dst += ret; + } + } else { + ret = snprintf(dst, rest(dst, end), "\"%s\"\n", val); + if (ret < 0) + break; + dst += ret; + } + } +out: + kfree(key); + + return ret < 0 ? ret : dst - (end - size); +} + +static int __init proc_boot_config_init(void) +{ + int len; + + len = copy_xbc_key_value_list(NULL, 0); + if (len < 0) + return len; + + if (len > 0) { + saved_boot_config = kzalloc(len + 1, GFP_KERNEL); + if (!saved_boot_config) + return -ENOMEM; + + len = copy_xbc_key_value_list(saved_boot_config, len + 1); + if (len < 0) { + kfree(saved_boot_config); + return len; + } + } + + proc_create_single("bootconfig", 0, NULL, boot_config_proc_show); + + return 0; +} +fs_initcall(proc_boot_config_init); diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c index 96f1087e372c..c1dea9b8222e 100644 --- a/fs/proc/cpuinfo.c +++ b/fs/proc/cpuinfo.c @@ -16,16 +16,16 @@ static int cpuinfo_open(struct inode *inode, struct file *file) return seq_open(file, &cpuinfo_op); } -static const struct file_operations proc_cpuinfo_operations = { - .open = cpuinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, +static const struct proc_ops cpuinfo_proc_ops = { + .proc_open = cpuinfo_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, }; static int __init proc_cpuinfo_init(void) { - proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); + proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops); return 0; } fs_initcall(proc_cpuinfo_init); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 64e9ee1b129e..3faed94e4b65 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -138,8 +138,12 @@ static int proc_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct proc_dir_entry *de = PDE(inode); - if (de && de->nlink) - set_nlink(inode, de->nlink); + if (de) { + nlink_t nlink = READ_ONCE(de->nlink); + if (nlink > 0) { + set_nlink(inode, nlink); + } + } generic_fillattr(inode, stat); return 0; @@ -159,7 +163,6 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, { const char *cp = name, *next; struct proc_dir_entry *de; - unsigned int len; de = *ret; if (!de) @@ -170,13 +173,12 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, if (!next) break; - len = next - cp; - de = pde_subdir_find(de, cp, len); + de = pde_subdir_find(de, cp, next - cp); if (!de) { WARN(1, "name '%s'\n", name); return -ENOENT; } - cp += len + 1; + cp = next + 1; } *residual = cp; *ret = de; @@ -362,6 +364,7 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, write_unlock(&proc_subdir_lock); goto out_free_inum; } + dir->nlink++; write_unlock(&proc_subdir_lock); return dp; @@ -470,12 +473,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent = __proc_create(&parent, name, S_IFDIR | mode, 2); if (ent) { ent->data = data; - ent->proc_fops = &proc_dir_operations; + ent->proc_dir_ops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; - parent->nlink++; ent = proc_register(parent, ent); - if (!ent) - parent->nlink--; } return ent; } @@ -503,12 +503,9 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent = __proc_create(&parent, name, mode, 2); if (ent) { ent->data = NULL; - ent->proc_fops = NULL; + ent->proc_dir_ops = NULL; ent->proc_iops = NULL; - parent->nlink++; ent = proc_register(parent, ent); - if (!ent) - parent->nlink--; } return ent; } @@ -536,25 +533,23 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops, void *data) + const struct proc_ops *proc_ops, void *data) { struct proc_dir_entry *p; - BUG_ON(proc_fops == NULL); - p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; - p->proc_fops = proc_fops; + p->proc_ops = proc_ops; return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops) + const struct proc_ops *proc_ops) { - return proc_create_data(name, mode, parent, proc_fops, NULL); + return proc_create_data(name, mode, parent, proc_ops, NULL); } EXPORT_SYMBOL(proc_create); @@ -576,11 +571,11 @@ static int proc_seq_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static const struct file_operations proc_seq_fops = { - .open = proc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = proc_seq_release, +static const struct proc_ops proc_seq_ops = { + .proc_open = proc_seq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = proc_seq_release, }; struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, @@ -592,7 +587,7 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; - p->proc_fops = &proc_seq_fops; + p->proc_ops = &proc_seq_ops; p->seq_ops = ops; p->state_size = state_size; return proc_register(parent, p); @@ -606,11 +601,11 @@ static int proc_single_open(struct inode *inode, struct file *file) return single_open(file, de->single_show, de->data); } -static const struct file_operations proc_single_fops = { - .open = proc_single_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops proc_single_ops = { + .proc_open = proc_single_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, @@ -622,7 +617,7 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; - p->proc_fops = &proc_single_fops; + p->proc_ops = &proc_single_ops; p->single_show = show; return proc_register(parent, p); } @@ -666,8 +661,12 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) len = strlen(fn); de = pde_subdir_find(parent, fn, len); - if (de) + if (de) { rb_erase(&de->subdir_node, &parent->subdir); + if (S_ISDIR(de->mode)) { + parent->nlink--; + } + } write_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -676,9 +675,6 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) proc_entry_rundown(de); - if (S_ISDIR(de->mode)) - parent->nlink--; - de->nlink = 0; WARN(pde_subdir_first(de), "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", __func__, de->parent->name, de->name, pde_subdir_first(de)->name); @@ -714,13 +710,12 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) de = next; continue; } - write_unlock(&proc_subdir_lock); - - proc_entry_rundown(de); next = de->parent; if (S_ISDIR(de->mode)) next->nlink--; - de->nlink = 0; + write_unlock(&proc_subdir_lock); + + proc_entry_rundown(de); if (de == root) break; pde_put(de); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index dbe43a50caf2..6da18316d209 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -163,7 +163,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); file = pdeo->file; - pde->proc_fops->release(file_inode(file), file); + pde->proc_ops->proc_release(file_inode(file), file); spin_lock(&pde->pde_unload_lock); /* After ->release. */ list_del(&pdeo->lh); @@ -200,12 +200,12 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) struct proc_dir_entry *pde = PDE(file_inode(file)); loff_t rv = -EINVAL; if (use_pde(pde)) { - typeof_member(struct file_operations, llseek) llseek; + typeof_member(struct proc_ops, proc_lseek) lseek; - llseek = pde->proc_fops->llseek; - if (!llseek) - llseek = default_llseek; - rv = llseek(file, offset, whence); + lseek = pde->proc_ops->proc_lseek; + if (!lseek) + lseek = default_llseek; + rv = lseek(file, offset, whence); unuse_pde(pde); } return rv; @@ -216,9 +216,9 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (use_pde(pde)) { - typeof_member(struct file_operations, read) read; + typeof_member(struct proc_ops, proc_read) read; - read = pde->proc_fops->read; + read = pde->proc_ops->proc_read; if (read) rv = read(file, buf, count, ppos); unuse_pde(pde); @@ -231,9 +231,9 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (use_pde(pde)) { - typeof_member(struct file_operations, write) write; + typeof_member(struct proc_ops, proc_write) write; - write = pde->proc_fops->write; + write = pde->proc_ops->proc_write; if (write) rv = write(file, buf, count, ppos); unuse_pde(pde); @@ -246,9 +246,9 @@ static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) struct proc_dir_entry *pde = PDE(file_inode(file)); __poll_t rv = DEFAULT_POLLMASK; if (use_pde(pde)) { - typeof_member(struct file_operations, poll) poll; + typeof_member(struct proc_ops, proc_poll) poll; - poll = pde->proc_fops->poll; + poll = pde->proc_ops->proc_poll; if (poll) rv = poll(file, pts); unuse_pde(pde); @@ -261,9 +261,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; if (use_pde(pde)) { - typeof_member(struct file_operations, unlocked_ioctl) ioctl; + typeof_member(struct proc_ops, proc_ioctl) ioctl; - ioctl = pde->proc_fops->unlocked_ioctl; + ioctl = pde->proc_ops->proc_ioctl; if (ioctl) rv = ioctl(file, cmd, arg); unuse_pde(pde); @@ -277,9 +277,9 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; if (use_pde(pde)) { - typeof_member(struct file_operations, compat_ioctl) compat_ioctl; + typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl; - compat_ioctl = pde->proc_fops->compat_ioctl; + compat_ioctl = pde->proc_ops->proc_compat_ioctl; if (compat_ioctl) rv = compat_ioctl(file, cmd, arg); unuse_pde(pde); @@ -293,9 +293,9 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; if (use_pde(pde)) { - typeof_member(struct file_operations, mmap) mmap; + typeof_member(struct proc_ops, proc_mmap) mmap; - mmap = pde->proc_fops->mmap; + mmap = pde->proc_ops->proc_mmap; if (mmap) rv = mmap(file, vma); unuse_pde(pde); @@ -312,9 +312,9 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long rv = -EIO; if (use_pde(pde)) { - typeof_member(struct file_operations, get_unmapped_area) get_area; + typeof_member(struct proc_ops, proc_get_unmapped_area) get_area; - get_area = pde->proc_fops->get_unmapped_area; + get_area = pde->proc_ops->proc_get_unmapped_area; #ifdef CONFIG_MMU if (!get_area) get_area = current->mm->get_unmapped_area; @@ -333,8 +333,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; - typeof_member(struct file_operations, open) open; - typeof_member(struct file_operations, release) release; + typeof_member(struct proc_ops, proc_open) open; + typeof_member(struct proc_ops, proc_release) release; struct pde_opener *pdeo; /* @@ -351,7 +351,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (!use_pde(pde)) return -ENOENT; - release = pde->proc_fops->release; + release = pde->proc_ops->proc_release; if (release) { pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); if (!pdeo) { @@ -360,7 +360,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) } } - open = pde->proc_fops->open; + open = pde->proc_ops->proc_open; if (open) rv = open(inode, file); @@ -468,21 +468,23 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_size = de->size; if (de->nlink) set_nlink(inode, de->nlink); - WARN_ON(!de->proc_iops); - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + + if (S_ISREG(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else -#endif - inode->i_fop = &proc_reg_file_ops; - } else { - inode->i_fop = de->proc_fops; + if (!de->proc_ops->proc_compat_ioctl) { + inode->i_fop = &proc_reg_file_ops_no_compat; } - } +#endif + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = de->proc_dir_ops; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = NULL; + } else + BUG(); } else pde_put(de); return inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index cd0c8d5ce9a1..41587276798e 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -39,7 +39,10 @@ struct proc_dir_entry { spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; - const struct file_operations *proc_fops; + union { + const struct proc_ops *proc_ops; + const struct file_operations *proc_dir_ops; + }; const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; @@ -197,8 +200,8 @@ extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, lof * inode.c */ struct pde_opener { - struct file *file; struct list_head lh; + struct file *file; bool closing; struct completion *c; } __randomize_layout; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index f5834488b67d..8ba492d44e68 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -31,6 +31,7 @@ #include <linux/ioport.h> #include <linux/memory.h> #include <linux/sched/task.h> +#include <linux/security.h> #include <asm/sections.h> #include "internal.h" @@ -545,9 +546,14 @@ out: static int open_kcore(struct inode *inode, struct file *filp) { + int ret = security_locked_down(LOCKDOWN_KCORE); + if (!capable(CAP_SYS_RAWIO)) return -EPERM; + if (ret) + return ret; + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!filp->private_data) return -ENOMEM; @@ -568,11 +574,11 @@ static int release_kcore(struct inode *inode, struct file *file) return 0; } -static const struct file_operations proc_kcore_operations = { - .read = read_kcore, - .open = open_kcore, - .release = release_kcore, - .llseek = default_llseek, +static const struct proc_ops kcore_proc_ops = { + .proc_read = read_kcore, + .proc_open = open_kcore, + .proc_release = release_kcore, + .proc_lseek = default_llseek, }; /* just remember that we have to update kcore */ @@ -631,8 +637,7 @@ static void __init add_modules_range(void) static int __init proc_kcore_init(void) { - proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, - &proc_kcore_operations); + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops); if (!proc_root_kcore) { pr_err("couldn't create /proc/kcore\n"); return 0; /* Always returns 0. */ diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index 4f4a2abb225e..ec1b7d2fb773 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -49,17 +49,17 @@ static __poll_t kmsg_poll(struct file *file, poll_table *wait) } -static const struct file_operations proc_kmsg_operations = { - .read = kmsg_read, - .poll = kmsg_poll, - .open = kmsg_open, - .release = kmsg_release, - .llseek = generic_file_llseek, +static const struct proc_ops kmsg_proc_ops = { + .proc_read = kmsg_read, + .proc_poll = kmsg_poll, + .proc_open = kmsg_open, + .proc_release = kmsg_release, + .proc_lseek = generic_file_llseek, }; static int __init proc_kmsg_init(void) { - proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); + proc_create("kmsg", S_IRUSR, NULL, &kmsg_proc_ops); return 0; } fs_initcall(proc_kmsg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 465ea0153b2a..8c1f1bb1a5ce 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -8,7 +8,6 @@ #include <linux/mmzone.h> #include <linux/proc_fs.h> #include <linux/percpu.h> -#include <linux/quicklist.h> #include <linux/seq_file.h> #include <linux/swap.h> #include <linux/vmstat.h> @@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_zone_page_state(NR_KERNEL_STACK_KB)); show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); -#ifdef CONFIG_QUICKLIST - show_val_kb(m, "Quicklists: ", quicklist_total_size()); -#endif show_val_kb(m, "NFS_Unstable: ", global_node_page_state(NR_UNSTABLE_NFS)); @@ -136,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR); show_val_kb(m, "ShmemPmdMapped: ", global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR); + show_val_kb(m, "FileHugePages: ", + global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR); + show_val_kb(m, "FilePmdMapped: ", + global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR); #endif #ifdef CONFIG_CMA diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index dd2b35f78b09..8e159fc78c0a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -33,6 +33,10 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_CGROUPS &cgroupns_operations, #endif +#ifdef CONFIG_TIME_NS + &timens_operations, + &timens_for_children_operations, +#endif }; static const char *proc_ns_get_link(struct dentry *dentry, @@ -42,22 +46,26 @@ static const char *proc_ns_get_link(struct dentry *dentry, const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; struct path ns_path; - void *error = ERR_PTR(-EACCES); + int error = -EACCES; if (!dentry) return ERR_PTR(-ECHILD); task = get_proc_task(inode); if (!task) - return error; + return ERR_PTR(-EACCES); - if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { - error = ns_get_path(&ns_path, task, ns_ops); - if (!error) - nd_jump_link(&ns_path); - } + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + goto out; + + error = ns_get_path(&ns_path, task, ns_ops); + if (error) + goto out; + + error = nd_jump_link(&ns_path); +out: put_task_struct(task); - return error; + return ERR_PTR(error); } static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) diff --git a/fs/proc/page.c b/fs/proc/page.c index 544d1ee15aee..f909243d4a66 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -21,6 +21,21 @@ #define KPMMASK (KPMSIZE - 1) #define KPMBITS (KPMSIZE * BITS_PER_BYTE) +static inline unsigned long get_max_dump_pfn(void) +{ +#ifdef CONFIG_SPARSEMEM + /* + * The memmap of early sections is completely populated and marked + * online even if max_pfn does not fall on a section boundary - + * pfn_to_online_page() will succeed on all pages. Allow inspecting + * these memmaps. + */ + return round_up(max_pfn, PAGES_PER_SECTION); +#else + return max_pfn; +#endif +} + /* /proc/kpagecount - an array exposing page counts * * Each entry is a u64 representing the corresponding @@ -29,6 +44,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; struct page *ppage; unsigned long src = *ppos; @@ -37,15 +53,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, u64 pcount; pfn = src / KPMSIZE; - count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) return -EINVAL; + if (src >= max_dump_pfn * KPMSIZE) + return 0; + count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); + if (!ppage || PageSlab(ppage) || page_has_type(ppage)) pcount = 0; else @@ -69,9 +89,9 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, return ret; } -static const struct file_operations proc_kpagecount_operations = { - .llseek = mem_lseek, - .read = kpagecount_read, +static const struct proc_ops kpagecount_proc_ops = { + .proc_lseek = mem_lseek, + .proc_read = kpagecount_read, }; /* /proc/kpageflags - an array exposing page flags @@ -204,6 +224,7 @@ u64 stable_page_flags(struct page *page) static ssize_t kpageflags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; struct page *ppage; unsigned long src = *ppos; @@ -211,15 +232,18 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, ssize_t ret = 0; pfn = src / KPMSIZE; - count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) return -EINVAL; + if (src >= max_dump_pfn * KPMSIZE) + return 0; + count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (put_user(stable_page_flags(ppage), out)) { ret = -EFAULT; @@ -239,15 +263,16 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, return ret; } -static const struct file_operations proc_kpageflags_operations = { - .llseek = mem_lseek, - .read = kpageflags_read, +static const struct proc_ops kpageflags_proc_ops = { + .proc_lseek = mem_lseek, + .proc_read = kpageflags_read, }; #ifdef CONFIG_MEMCG static ssize_t kpagecgroup_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; struct page *ppage; unsigned long src = *ppos; @@ -256,15 +281,18 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf, u64 ino; pfn = src / KPMSIZE; - count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) return -EINVAL; + if (src >= max_dump_pfn * KPMSIZE) + return 0; + count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (ppage) ino = page_cgroup_ino(ppage); @@ -289,18 +317,18 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf, return ret; } -static const struct file_operations proc_kpagecgroup_operations = { - .llseek = mem_lseek, - .read = kpagecgroup_read, +static const struct proc_ops kpagecgroup_proc_ops = { + .proc_lseek = mem_lseek, + .proc_read = kpagecgroup_read, }; #endif /* CONFIG_MEMCG */ static int __init proc_page_init(void) { - proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); - proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); + proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops); + proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops); #ifdef CONFIG_MEMCG - proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations); + proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); #endif return 0; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 76ae278df1c4..4888c5224442 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -90,12 +90,12 @@ static int seq_release_net(struct inode *ino, struct file *f) return 0; } -static const struct file_operations proc_net_seq_fops = { - .open = seq_open_net, - .read = seq_read, - .write = proc_simple_write, - .llseek = seq_lseek, - .release = seq_release_net, +static const struct proc_ops proc_net_seq_ops = { + .proc_open = seq_open_net, + .proc_read = seq_read, + .proc_write = proc_simple_write, + .proc_lseek = seq_lseek, + .proc_release = seq_release_net, }; struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, @@ -108,7 +108,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, if (!p) return NULL; pde_force_lookup(p); - p->proc_fops = &proc_net_seq_fops; + p->proc_ops = &proc_net_seq_ops; p->seq_ops = ops; p->state_size = state_size; return proc_register(parent, p); @@ -152,7 +152,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode if (!p) return NULL; pde_force_lookup(p); - p->proc_fops = &proc_net_seq_fops; + p->proc_ops = &proc_net_seq_ops; p->seq_ops = ops; p->state_size = state_size; p->write = write; @@ -183,12 +183,12 @@ static int single_release_net(struct inode *ino, struct file *f) return single_release(ino, f); } -static const struct file_operations proc_net_single_fops = { - .open = single_open_net, - .read = seq_read, - .write = proc_simple_write, - .llseek = seq_lseek, - .release = single_release_net, +static const struct proc_ops proc_net_single_ops = { + .proc_open = single_open_net, + .proc_read = seq_read, + .proc_write = proc_simple_write, + .proc_lseek = seq_lseek, + .proc_release = single_release_net, }; struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, @@ -201,7 +201,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, if (!p) return NULL; pde_force_lookup(p); - p->proc_fops = &proc_net_single_fops; + p->proc_ops = &proc_net_single_ops; p->single_show = show; return proc_register(parent, p); } @@ -244,7 +244,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo if (!p) return NULL; pde_force_lookup(p); - p->proc_fops = &proc_net_single_fops; + p->proc_ops = &proc_net_single_ops; p->single_show = show; p->write = write; return proc_register(parent, p); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d80989b6c344..c75bb4632ed1 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1720,7 +1720,7 @@ int __init proc_sys_init(void) proc_sys_root = proc_mkdir("sys", NULL); proc_sys_root->proc_iops = &proc_sys_dir_operations; - proc_sys_root->proc_fops = &proc_sys_dir_file_operations; + proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; return sysctl_init(); diff --git a/fs/proc/root.c b/fs/proc/root.c index 33f72d1b92cc..608233dfd29c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -41,24 +41,19 @@ enum proc_param { Opt_hidepid, }; -static const struct fs_parameter_spec proc_param_specs[] = { +static const struct fs_parameter_spec proc_fs_parameters[] = { fsparam_u32("gid", Opt_gid), fsparam_u32("hidepid", Opt_hidepid), {} }; -static const struct fs_parameter_description proc_fs_parameters = { - .name = "proc", - .specs = proc_param_specs, -}; - static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; - opt = fs_parse(fc, &proc_fs_parameters, param, &result); + opt = fs_parse(fc, proc_fs_parameters, param, &result); if (opt < 0) return opt; @@ -71,7 +66,7 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->hidepid = result.uint_32; if (ctx->hidepid < HIDEPID_OFF || ctx->hidepid > HIDEPID_INVISIBLE) - return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); + return invalfc(fc, "hidepid value must be between 0 and 2.\n"); break; default: @@ -157,8 +152,7 @@ static int proc_get_tree(struct fs_context *fc) { struct proc_fs_context *ctx = fc->fs_private; - fc->s_fs_info = ctx->pid_ns; - return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super); + return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns); } static void proc_fs_context_free(struct fs_context *fc) @@ -208,7 +202,7 @@ static void proc_kill_sb(struct super_block *sb) static struct file_system_type proc_fs_type = { .name = "proc", .init_fs_context = proc_init_fs_context, - .parameters = &proc_fs_parameters, + .parameters = proc_fs_parameters, .kill_sb = proc_kill_sb, .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; @@ -293,7 +287,7 @@ struct proc_dir_entry proc_root = { .nlink = 2, .refcnt = REFCOUNT_INIT(1), .proc_iops = &proc_root_inode_operations, - .proc_fops = &proc_root_operations, + .proc_dir_ops = &proc_root_operations, .parent = &proc_root, .subdir = RB_ROOT, .name = "/proc", diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 80c305f206bb..0449edf460f5 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -120,20 +120,23 @@ static int show_stat(struct seq_file *p, void *v) getboottime64(&boottime); for_each_possible_cpu(i) { - struct kernel_cpustat *kcs = &kcpustat_cpu(i); - - user += kcs->cpustat[CPUTIME_USER]; - nice += kcs->cpustat[CPUTIME_NICE]; - system += kcs->cpustat[CPUTIME_SYSTEM]; - idle += get_idle_time(kcs, i); - iowait += get_iowait_time(kcs, i); - irq += kcs->cpustat[CPUTIME_IRQ]; - softirq += kcs->cpustat[CPUTIME_SOFTIRQ]; - steal += kcs->cpustat[CPUTIME_STEAL]; - guest += kcs->cpustat[CPUTIME_GUEST]; - guest_nice += kcs->cpustat[CPUTIME_GUEST_NICE]; - sum += kstat_cpu_irqs_sum(i); - sum += arch_irq_stat_cpu(i); + struct kernel_cpustat kcpustat; + u64 *cpustat = kcpustat.cpustat; + + kcpustat_cpu_fetch(&kcpustat, i); + + user += cpustat[CPUTIME_USER]; + nice += cpustat[CPUTIME_NICE]; + system += cpustat[CPUTIME_SYSTEM]; + idle += get_idle_time(&kcpustat, i); + iowait += get_iowait_time(&kcpustat, i); + irq += cpustat[CPUTIME_IRQ]; + softirq += cpustat[CPUTIME_SOFTIRQ]; + steal += cpustat[CPUTIME_STEAL]; + guest += cpustat[CPUTIME_GUEST]; + guest_nice += cpustat[CPUTIME_GUEST_NICE]; + sum += kstat_cpu_irqs_sum(i); + sum += arch_irq_stat_cpu(i); for (j = 0; j < NR_SOFTIRQS; j++) { unsigned int softirq_stat = kstat_softirqs_cpu(j, i); @@ -157,19 +160,22 @@ static int show_stat(struct seq_file *p, void *v) seq_putc(p, '\n'); for_each_online_cpu(i) { - struct kernel_cpustat *kcs = &kcpustat_cpu(i); + struct kernel_cpustat kcpustat; + u64 *cpustat = kcpustat.cpustat; + + kcpustat_cpu_fetch(&kcpustat, i); /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kcs->cpustat[CPUTIME_USER]; - nice = kcs->cpustat[CPUTIME_NICE]; - system = kcs->cpustat[CPUTIME_SYSTEM]; - idle = get_idle_time(kcs, i); - iowait = get_iowait_time(kcs, i); - irq = kcs->cpustat[CPUTIME_IRQ]; - softirq = kcs->cpustat[CPUTIME_SOFTIRQ]; - steal = kcs->cpustat[CPUTIME_STEAL]; - guest = kcs->cpustat[CPUTIME_GUEST]; - guest_nice = kcs->cpustat[CPUTIME_GUEST_NICE]; + user = cpustat[CPUTIME_USER]; + nice = cpustat[CPUTIME_NICE]; + system = cpustat[CPUTIME_SYSTEM]; + idle = get_idle_time(&kcpustat, i); + iowait = get_iowait_time(&kcpustat, i); + irq = cpustat[CPUTIME_IRQ]; + softirq = cpustat[CPUTIME_SOFTIRQ]; + steal = cpustat[CPUTIME_STEAL]; + guest = cpustat[CPUTIME_GUEST]; + guest_nice = cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); seq_put_decimal_ull(p, " ", nsec_to_clock_t(user)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); @@ -217,16 +223,16 @@ static int stat_open(struct inode *inode, struct file *file) return single_open_size(file, show_stat, NULL, size); } -static const struct file_operations proc_stat_operations = { - .open = stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops stat_proc_ops = { + .proc_open = stat_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static int __init proc_stat_init(void) { - proc_create("stat", 0, NULL, &proc_stat_operations); + proc_create("stat", 0, NULL, &stat_proc_ops); return 0; } fs_initcall(proc_stat_init); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 731642e0f5a0..3ba9ae83bff5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/mm.h> +#include <linux/pagewalk.h> #include <linux/vmacache.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> @@ -417,6 +417,7 @@ struct mem_size_stats { unsigned long lazyfree; unsigned long anonymous_thp; unsigned long shmem_thp; + unsigned long file_thp; unsigned long swap; unsigned long shared_hugetlb; unsigned long private_hugetlb; @@ -461,7 +462,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked) { - int i, nr = compound ? 1 << compound_order(page) : 1; + int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; /* @@ -504,7 +505,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, #ifdef CONFIG_SHMEM static int smaps_pte_hole(unsigned long addr, unsigned long end, - struct mm_walk *walk) + __always_unused int depth, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; @@ -513,7 +514,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, return 0; } -#endif +#else +#define smaps_pte_hole NULL +#endif /* CONFIG_SHMEM */ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) @@ -586,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, else if (is_zone_device_page(page)) /* pass */; else - VM_BUG_ON_PAGE(1, page); + mss->file_thp += HPAGE_PMD_SIZE; smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); } #else @@ -729,21 +732,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, } return 0; } +#else +#define smaps_hugetlb_range NULL #endif /* HUGETLB_PAGE */ +static const struct mm_walk_ops smaps_walk_ops = { + .pmd_entry = smaps_pte_range, + .hugetlb_entry = smaps_hugetlb_range, +}; + +static const struct mm_walk_ops smaps_shmem_walk_ops = { + .pmd_entry = smaps_pte_range, + .hugetlb_entry = smaps_hugetlb_range, + .pte_hole = smaps_pte_hole, +}; + static void smap_gather_stats(struct vm_area_struct *vma, struct mem_size_stats *mss) { - struct mm_walk smaps_walk = { - .pmd_entry = smaps_pte_range, -#ifdef CONFIG_HUGETLB_PAGE - .hugetlb_entry = smaps_hugetlb_range, -#endif - .mm = vma->vm_mm, - }; - - smaps_walk.private = mss; - #ifdef CONFIG_SHMEM /* In case of smaps_rollup, reset the value from previous vma */ mss->check_shmem_swap = false; @@ -765,12 +771,13 @@ static void smap_gather_stats(struct vm_area_struct *vma, mss->swap += shmem_swapped; } else { mss->check_shmem_swap = true; - smaps_walk.pte_hole = smaps_pte_hole; + walk_page_vma(vma, &smaps_shmem_walk_ops, mss); + return; } } #endif /* mmap_sem is held in m_start */ - walk_page_vma(vma, &smaps_walk); + walk_page_vma(vma, &smaps_walk_ops, mss); } #define SEQ_PUT_DEC(str, val) \ @@ -803,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); + SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7); @@ -1118,6 +1126,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, return 0; } +static const struct mm_walk_ops clear_refs_walk_ops = { + .pmd_entry = clear_refs_pte_range, + .test_walk = clear_refs_test_walk, +}; + static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -1151,12 +1164,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, struct clear_refs_private cp = { .type = type, }; - struct mm_walk clear_refs_walk = { - .pmd_entry = clear_refs_pte_range, - .test_walk = clear_refs_test_walk, - .mm = mm, - .private = &cp, - }; if (type == CLEAR_REFS_MM_HIWATER_RSS) { if (down_write_killable(&mm->mmap_sem)) { @@ -1217,7 +1224,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, 0, NULL, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } - walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); + walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, + &cp); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); @@ -1274,7 +1282,7 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, } static int pagemap_pte_hole(unsigned long start, unsigned long end, - struct mm_walk *walk) + __always_unused int depth, struct mm_walk *walk) { struct pagemapread *pm = walk->private; unsigned long addr = start; @@ -1489,8 +1497,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, return err; } +#else +#define pagemap_hugetlb_range NULL #endif /* HUGETLB_PAGE */ +static const struct mm_walk_ops pagemap_ops = { + .pmd_entry = pagemap_pmd_range, + .pte_hole = pagemap_pte_hole, + .hugetlb_entry = pagemap_hugetlb_range, +}; + /* * /proc/pid/pagemap - an array mapping virtual pages to pfns * @@ -1522,7 +1538,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, { struct mm_struct *mm = file->private_data; struct pagemapread pm; - struct mm_walk pagemap_walk = {}; unsigned long src; unsigned long svpfn; unsigned long start_vaddr; @@ -1550,14 +1565,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!pm.buffer) goto out_mm; - pagemap_walk.pmd_entry = pagemap_pmd_range; - pagemap_walk.pte_hole = pagemap_pte_hole; -#ifdef CONFIG_HUGETLB_PAGE - pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; -#endif - pagemap_walk.mm = mm; - pagemap_walk.private = ± - src = *ppos; svpfn = src / PM_ENTRY_BYTES; start_vaddr = svpfn << PAGE_SHIFT; @@ -1586,7 +1593,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = down_read_killable(&mm->mmap_sem); if (ret) goto out_free; - ret = walk_page_range(start_vaddr, end, &pagemap_walk); + ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); up_read(&mm->mmap_sem); start_vaddr = end; @@ -1798,6 +1805,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, } #endif +static const struct mm_walk_ops show_numa_ops = { + .hugetlb_entry = gather_hugetlb_stats, + .pmd_entry = gather_pte_stats, +}; + /* * Display pages allocated per node and memory policy via /proc. */ @@ -1809,12 +1821,6 @@ static int show_numa_map(struct seq_file *m, void *v) struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; - struct mm_walk walk = { - .hugetlb_entry = gather_hugetlb_stats, - .pmd_entry = gather_pte_stats, - .private = md, - .mm = mm, - }; struct mempolicy *pol; char buffer[64]; int nid; @@ -1848,7 +1854,7 @@ static int show_numa_map(struct seq_file *m, void *v) seq_puts(m, " huge"); /* mmap_sem is held by m_start */ - walk_page_vma(vma, &walk); + walk_page_vma(vma, &show_numa_ops, md); if (!md->pages) goto out; diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index a4c2791ab70b..5a1b228964fb 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,6 +5,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/time.h> +#include <linux/time_namespace.h> #include <linux/kernel_stat.h> static int uptime_proc_show(struct seq_file *m, void *v) @@ -20,6 +21,8 @@ static int uptime_proc_show(struct seq_file *m, void *v) nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; ktime_get_boottime_ts64(&uptime); + timens_add_boottime(&uptime); + idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); idle.tv_nsec = rem; seq_printf(m, "%lu.%02lu %lu.%02lu\n", diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7bcc92add72c..7dc800cce354 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -104,9 +104,9 @@ static int pfn_is_ram(unsigned long pfn) } /* Reads a page from the oldmem device from given offset. */ -static ssize_t read_from_oldmem(char *buf, size_t count, - u64 *ppos, int userbuf, - bool encrypted) +ssize_t read_from_oldmem(char *buf, size_t count, + u64 *ppos, int userbuf, + bool encrypted) { unsigned long pfn, offset; size_t nr_bytes; @@ -170,7 +170,7 @@ void __weak elfcorehdr_free(unsigned long long addr) */ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, sev_active()); + return read_from_oldmem(buf, count, ppos, 0, false); } /* @@ -667,10 +667,10 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) } #endif -static const struct file_operations proc_vmcore_operations = { - .read = read_vmcore, - .llseek = default_llseek, - .mmap = mmap_vmcore, +static const struct proc_ops vmcore_proc_ops = { + .proc_read = read_vmcore, + .proc_lseek = default_llseek, + .proc_mmap = mmap_vmcore, }; static struct vmcore* __init get_new_element(void) @@ -1555,7 +1555,7 @@ static int __init vmcore_init(void) elfcorehdr_free(elfcorehdr_addr); elfcorehdr_addr = ELFCORE_ADDR_ERR; - proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); + proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &vmcore_proc_ops); if (proc_vmcore) proc_vmcore->size = vmcore_size; return 0; |