diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/base.c | 71 | ||||
-rw-r--r-- | fs/proc/inode.c | 52 | ||||
-rw-r--r-- | fs/proc/internal.h | 5 | ||||
-rw-r--r-- | fs/proc/root.c | 236 |
4 files changed, 210 insertions, 154 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 5ab1849971b4..ddef482f1334 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -59,6 +59,7 @@ #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/generic-radix-tree.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/namei.h> @@ -92,7 +93,6 @@ #include <linux/sched/coredump.h> #include <linux/sched/debug.h> #include <linux/sched/stat.h> -#include <linux/flex_array.h> #include <linux/posix-timers.h> #include <trace/events/oom.h> #include "internal.h" @@ -2142,11 +2142,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) struct task_struct *task; struct mm_struct *mm; unsigned long nr_files, pos, i; - struct flex_array *fa = NULL; - struct map_files_info info; + GENRADIX(struct map_files_info) fa; struct map_files_info *p; int ret; + genradix_init(&fa); + ret = -ENOENT; task = get_proc_task(file_inode(file)); if (!task) @@ -2178,35 +2179,22 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) */ for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { - if (vma->vm_file && ++pos > ctx->pos) - nr_files++; - } + if (!vma->vm_file) + continue; + if (++pos <= ctx->pos) + continue; - if (nr_files) { - fa = flex_array_alloc(sizeof(info), nr_files, - GFP_KERNEL); - if (!fa || flex_array_prealloc(fa, 0, nr_files, - GFP_KERNEL)) { + p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); + if (!p) { ret = -ENOMEM; - if (fa) - flex_array_free(fa); up_read(&mm->mmap_sem); mmput(mm); goto out_put_task; } - for (i = 0, vma = mm->mmap, pos = 2; vma; - vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (++pos <= ctx->pos) - continue; - info.start = vma->vm_start; - info.end = vma->vm_end; - info.mode = vma->vm_file->f_mode; - if (flex_array_put(fa, i++, &info, GFP_KERNEL)) - BUG(); - } + p->start = vma->vm_start; + p->end = vma->vm_end; + p->mode = vma->vm_file->f_mode; } up_read(&mm->mmap_sem); mmput(mm); @@ -2215,7 +2203,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ unsigned int len; - p = flex_array_get(fa, i); + p = genradix_ptr(&fa, i); len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, buf, len, @@ -2225,12 +2213,11 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) break; ctx->pos++; } - if (fa) - flex_array_free(fa); out_put_task: put_task_struct(task); out: + genradix_free(&fa); return ret; } @@ -2459,11 +2446,10 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry, static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, - const struct pid_entry *ents, - unsigned int nents) + const struct pid_entry *p, + const struct pid_entry *end) { struct task_struct *task = get_proc_task(dir); - const struct pid_entry *p, *last; struct dentry *res = ERR_PTR(-ENOENT); if (!task) @@ -2473,8 +2459,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir, * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ - last = &ents[nents]; - for (p = ents; p < last; p++) { + for (; p < end; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) { @@ -2610,7 +2595,7 @@ static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ { \ return proc_pident_lookup(dir, dentry, \ LSM##_attr_dir_stuff, \ - ARRAY_SIZE(LSM##_attr_dir_stuff)); \ + LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ @@ -2655,7 +2640,8 @@ static struct dentry *proc_attr_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, - attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); + attr_dir_stuff, + attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff)); } static const struct inode_operations proc_attr_dir_inode_operations = { @@ -3088,10 +3074,20 @@ static const struct file_operations proc_tgid_base_operations = { .llseek = generic_file_llseek, }; +struct pid *tgid_pidfd_to_pid(const struct file *file) +{ + if (!d_is_dir(file->f_path.dentry) || + (file->f_op != &proc_tgid_base_operations)) + return ERR_PTR(-EBADF); + + return proc_pid(file_inode(file)); +} + static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, - tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); + tgid_base_stuff, + tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff)); } static const struct inode_operations proc_tgid_base_inode_operations = { @@ -3463,7 +3459,8 @@ static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, - tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); + tid_base_stuff, + tid_base_stuff + ARRAY_SIZE(tid_base_stuff)); } static const struct file_operations proc_tid_base_operations = { diff --git a/fs/proc/inode.c b/fs/proc/inode.c index da649ccd6804..fc7e38def174 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -24,7 +24,6 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/mount.h> -#include <linux/magic.h> #include <linux/uaccess.h> @@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static const struct super_operations proc_sops = { +const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, .show_options = proc_show_options, }; @@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) pde_put(de); return inode; } - -int proc_fill_super(struct super_block *s, void *data, int silent) -{ - struct pid_namespace *ns = get_pid_ns(s->s_fs_info); - struct inode *root_inode; - int ret; - - if (!proc_parse_options(data, ns)) - return -EINVAL; - - /* User space would break if executables or devices appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; - s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; - s->s_magic = PROC_SUPER_MAGIC; - s->s_op = &proc_sops; - s->s_time_gran = 1; - - /* - * procfs isn't actually a stacking filesystem; however, there is - * too much magic going on inside it to permit stacking things on - * top of it - */ - s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; - - /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; - - pde_get(&proc_root); - root_inode = proc_get_inode(s, &proc_root); - if (!root_inode) { - pr_err("proc_fill_super: get root inode failed\n"); - return -ENOMEM; - } - - s->s_root = d_make_root(root_inode); - if (!s->s_root) { - pr_err("proc_fill_super: allocate dentry failed\n"); - return -ENOMEM; - } - - ret = proc_setup_self(s); - if (ret) { - return ret; - } - return proc_setup_thread_self(s); -} diff --git a/fs/proc/internal.h b/fs/proc/internal.h index ea575375f210..d1671e97f7fe 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -207,13 +207,12 @@ struct pde_opener { struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; - extern const struct inode_operations proc_pid_link_inode_operations; +extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); -extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); /* @@ -271,10 +270,8 @@ static inline void proc_tty_init(void) {} * root.c */ extern struct proc_dir_entry proc_root; -extern int proc_parse_options(char *options, struct pid_namespace *pid); extern void proc_self_init(void); -extern int proc_remount(struct super_block *, int *, char *); /* * task_[no]mmu.c diff --git a/fs/proc/root.c b/fs/proc/root.c index 621e6ec322ca..8b145e7b9661 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -19,86 +19,178 @@ #include <linux/module.h> #include <linux/bitops.h> #include <linux/user_namespace.h> +#include <linux/fs_context.h> #include <linux/mount.h> #include <linux/pid_namespace.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> #include <linux/cred.h> +#include <linux/magic.h> +#include <linux/slab.h> #include "internal.h" -enum { - Opt_gid, Opt_hidepid, Opt_err, +struct proc_fs_context { + struct pid_namespace *pid_ns; + unsigned int mask; + int hidepid; + int gid; }; -static const match_table_t tokens = { - {Opt_hidepid, "hidepid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_err, NULL}, +enum proc_param { + Opt_gid, + Opt_hidepid, }; -int proc_parse_options(char *options, struct pid_namespace *pid) +static const struct fs_parameter_spec proc_param_specs[] = { + fsparam_u32("gid", Opt_gid), + fsparam_u32("hidepid", Opt_hidepid), + {} +}; + +static const struct fs_parameter_description proc_fs_parameters = { + .name = "proc", + .specs = proc_param_specs, +}; + +static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p; - substring_t args[MAX_OPT_ARGS]; - int option; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - switch (token) { - case Opt_gid: - if (match_int(&args[0], &option)) - return 0; - pid->pid_gid = make_kgid(current_user_ns(), option); - break; - case Opt_hidepid: - if (match_int(&args[0], &option)) - return 0; - if (option < HIDEPID_OFF || - option > HIDEPID_INVISIBLE) { - pr_err("proc: hidepid value must be between 0 and 2.\n"); - return 0; - } - pid->hide_pid = option; - break; - default: - pr_err("proc: unrecognized mount option \"%s\" " - "or missing value\n", p); - return 0; - } + struct proc_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, &proc_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_gid: + ctx->gid = result.uint_32; + break; + + case Opt_hidepid: + ctx->hidepid = result.uint_32; + if (ctx->hidepid < HIDEPID_OFF || + ctx->hidepid > HIDEPID_INVISIBLE) + return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); + break; + + default: + return -EINVAL; } - return 1; + ctx->mask |= 1 << opt; + return 0; } -int proc_remount(struct super_block *sb, int *flags, char *data) +static void proc_apply_options(struct super_block *s, + struct fs_context *fc, + struct pid_namespace *pid_ns, + struct user_namespace *user_ns) { + struct proc_fs_context *ctx = fc->fs_private; + + if (ctx->mask & (1 << Opt_gid)) + pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); + if (ctx->mask & (1 << Opt_hidepid)) + pid_ns->hide_pid = ctx->hidepid; +} + +static int proc_fill_super(struct super_block *s, struct fs_context *fc) +{ + struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); + struct inode *root_inode; + int ret; + + proc_apply_options(s, fc, pid_ns, current_user_ns()); + + /* User space would break if executables or devices appear on proc */ + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; + s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = PROC_SUPER_MAGIC; + s->s_op = &proc_sops; + s->s_time_gran = 1; + + /* + * procfs isn't actually a stacking filesystem; however, there is + * too much magic going on inside it to permit stacking things on + * top of it + */ + s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + + /* procfs dentries and inodes don't require IO to create */ + s->s_shrink.seeks = 0; + + pde_get(&proc_root); + root_inode = proc_get_inode(s, &proc_root); + if (!root_inode) { + pr_err("proc_fill_super: get root inode failed\n"); + return -ENOMEM; + } + + s->s_root = d_make_root(root_inode); + if (!s->s_root) { + pr_err("proc_fill_super: allocate dentry failed\n"); + return -ENOMEM; + } + + ret = proc_setup_self(s); + if (ret) { + return ret; + } + return proc_setup_thread_self(s); +} + +static int proc_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; struct pid_namespace *pid = sb->s_fs_info; sync_filesystem(sb); - return !proc_parse_options(data, pid); + + proc_apply_options(sb, fc, pid, current_user_ns()); + return 0; } -static struct dentry *proc_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int proc_get_tree(struct fs_context *fc) { - struct pid_namespace *ns; + struct proc_fs_context *ctx = fc->fs_private; - if (flags & SB_KERNMOUNT) { - ns = data; - data = NULL; - } else { - ns = task_active_pid_ns(current); - } + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); + fc->s_fs_info = ctx->pid_ns; + return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super); +} - return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); +static void proc_fs_context_free(struct fs_context *fc) +{ + struct proc_fs_context *ctx = fc->fs_private; + + if (ctx->pid_ns) + put_pid_ns(ctx->pid_ns); + kfree(ctx); +} + +static const struct fs_context_operations proc_fs_context_ops = { + .free = proc_fs_context_free, + .parse_param = proc_parse_param, + .get_tree = proc_get_tree, + .reconfigure = proc_reconfigure, +}; + +static int proc_init_fs_context(struct fs_context *fc) +{ + struct proc_fs_context *ctx; + + ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); + fc->fs_private = ctx; + fc->ops = &proc_fs_context_ops; + return 0; } static void proc_kill_sb(struct super_block *sb) @@ -115,10 +207,11 @@ static void proc_kill_sb(struct super_block *sb) } static struct file_system_type proc_fs_type = { - .name = "proc", - .mount = proc_mount, - .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "proc", + .init_fs_context = proc_init_fs_context, + .parameters = &proc_fs_parameters, + .kill_sb = proc_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -156,7 +249,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr { if (!proc_pid_lookup(dentry, flags)) return NULL; - + return proc_lookup(dir, dentry, flags); } @@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = { int pid_ns_prepare_proc(struct pid_namespace *ns) { + struct proc_fs_context *ctx; + struct fs_context *fc; struct vfsmount *mnt; - mnt = kern_mount_data(&proc_fs_type, ns); + fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + if (fc->user_ns != ns->user_ns) { + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ns->user_ns); + } + + ctx = fc->fs_private; + if (ctx->pid_ns != ns) { + put_pid_ns(ctx->pid_ns); + get_pid_ns(ns); + ctx->pid_ns = ns; + } + + mnt = fc_mount(fc); + put_fs_context(fc); if (IS_ERR(mnt)) return PTR_ERR(mnt); |