diff options
Diffstat (limited to 'fs')
63 files changed, 592 insertions, 334 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4c09d93d9569..b2f82cf6bf86 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -170,8 +170,8 @@ config BINFMT_MISC You can do other nice things, too. Read the file <file:Documentation/binfmt_misc.txt> to learn how to use this - feature, <file:Documentation/java.txt> for information about how - to include Java support. and <file:Documentation/mono.txt> for + feature, <file:Documentation/admin-guide/java.rst> for information about how + to include Java support. and <file:Documentation/admin-guide/mono.rst> for information about how to include Mono-based .NET support. To use binfmt_misc, you will need to mount it: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2472af2798c7..e6c1bd443806 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2204,7 +2204,9 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL); + if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) + goto end_coredump; + vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz)); if (!vma_filesz) goto end_coredump; @@ -2311,7 +2313,7 @@ end_coredump: cleanup: free_note_info(&info); kfree(shdr4extnum); - kfree(vma_filesz); + vfree(vma_filesz); kfree(phdr4note); kfree(elf); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74ed5aae6cea..180f910339f4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -202,27 +202,31 @@ static struct ratelimit_state printk_limits[] = { void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) { struct super_block *sb = fs_info->sb; - char lvl[4]; + char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1]; struct va_format vaf; va_list args; - const char *type = logtypes[4]; + const char *type = NULL; int kern_level; struct ratelimit_state *ratelimit; va_start(args, fmt); - kern_level = printk_get_level(fmt); - if (kern_level) { + while ((kern_level = printk_get_level(fmt)) != 0) { size_t size = printk_skip_level(fmt) - fmt; - memcpy(lvl, fmt, size); - lvl[size] = '\0'; + + if (kern_level >= '0' && kern_level <= '7') { + memcpy(lvl, fmt, size); + lvl[size] = '\0'; + type = logtypes[kern_level - '0']; + ratelimit = &printk_limits[kern_level - '0']; + } fmt += size; - type = logtypes[kern_level - '0']; - ratelimit = &printk_limits[kern_level - '0']; - } else { + } + + if (!type) { *lvl = '\0'; - /* Default to debug output */ - ratelimit = &printk_limits[7]; + type = logtypes[4]; + ratelimit = &printk_limits[4]; } vaf.fmt = fmt; diff --git a/fs/buffer.c b/fs/buffer.c index b205a629001d..1613656028d6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3403,7 +3403,7 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void buffer_exit_cpu(int cpu) +static int buffer_exit_cpu_dead(unsigned int cpu) { int i; struct bh_lru *b = &per_cpu(bh_lrus, cpu); @@ -3414,14 +3414,7 @@ static void buffer_exit_cpu(int cpu) } this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); per_cpu(bh_accounting, cpu).nr = 0; -} - -static int buffer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) - buffer_exit_cpu((unsigned long)hcpu); - return NOTIFY_OK; + return 0; } /** @@ -3471,6 +3464,7 @@ EXPORT_SYMBOL(bh_submit_read); void __init buffer_init(void) { unsigned long nrpages; + int ret; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, @@ -3483,5 +3477,7 @@ void __init buffer_init(void) */ nrpages = (nr_free_buffer_pages() * 10) / 100; max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); - hotcpu_notifier(buffer_cpu_notify, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead", + NULL, buffer_exit_cpu_dead); + WARN_ON(ret < 0); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 78180d151730..a594c7879cc2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1261,26 +1261,30 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; op = ceph_snap(dir) == CEPH_SNAPDIR ? - CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; + CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (!IS_ERR(req)) { req->r_dentry = dget(dentry); - req->r_num_caps = 2; + req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2; mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; req->r_args.getattr.mask = mask; - req->r_locked_dir = dir; err = ceph_mdsc_do_request(mdsc, NULL, req); - if (err == 0 || err == -ENOENT) { - if (dentry == req->r_dentry) { - valid = !d_unhashed(dentry); - } else { - d_invalidate(req->r_dentry); - err = -EAGAIN; - } + switch (err) { + case 0: + if (d_really_is_positive(dentry) && + d_inode(dentry) == req->r_target_inode) + valid = 1; + break; + case -ENOENT: + if (d_really_is_negative(dentry)) + valid = 1; + /* Fallthrough */ + default: + break; } ceph_mdsc_put_request(req); dout("d_revalidate %p lookup result=%d\n", diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 8347c90cf483..5eb04129f938 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -808,7 +808,11 @@ calc_seckey(struct cifs_ses *ses) struct crypto_skcipher *tfm_arc4; struct scatterlist sgin, sgout; struct skcipher_request *req; - unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */ + unsigned char *sec_key; + + sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL); + if (sec_key == NULL) + return -ENOMEM; get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); @@ -816,7 +820,7 @@ calc_seckey(struct cifs_ses *ses) if (IS_ERR(tfm_arc4)) { rc = PTR_ERR(tfm_arc4); cifs_dbg(VFS, "could not allocate crypto API arc4\n"); - return rc; + goto out; } rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response, @@ -854,7 +858,8 @@ calc_seckey(struct cifs_ses *ses) out_free_cipher: crypto_free_skcipher(tfm_arc4); - +out: + kfree(sec_key); return rc; } diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3f3185febc58..e3fed9249a04 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3427,6 +3427,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, __u16 rc = 0; struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)parm_data; struct posix_acl_xattr_header *local_acl = (void *)pACL; + struct posix_acl_xattr_entry *ace = (void *)(local_acl + 1); int count; int i; @@ -3453,8 +3454,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return 0; } for (i = 0; i < count; i++) { - rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], - (struct posix_acl_xattr_entry *)(local_acl + 1)); + rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &ace[i]); if (rc != 0) { /* ACE not converted */ break; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index aab5227979e2..4547aeddd12b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -412,6 +412,9 @@ cifs_reconnect(struct TCP_Server_Info *server) } } while (server->tcpStatus == CifsNeedReconnect); + if (server->tcpStatus == CifsNeedNegotiate) + mod_delayed_work(cifsiod_wq, &server->echo, 0); + return rc; } @@ -421,17 +424,25 @@ cifs_echo_request(struct work_struct *work) int rc; struct TCP_Server_Info *server = container_of(work, struct TCP_Server_Info, echo.work); - unsigned long echo_interval = server->echo_interval; + unsigned long echo_interval; + + /* + * If we need to renegotiate, set echo interval to zero to + * immediately call echo service where we can renegotiate. + */ + if (server->tcpStatus == CifsNeedNegotiate) + echo_interval = 0; + else + echo_interval = server->echo_interval; /* - * We cannot send an echo if it is disabled or until the - * NEGOTIATE_PROTOCOL request is done, which is indicated by - * server->ops->need_neg() == true. Also, no need to ping if - * we got a response recently. + * We cannot send an echo if it is disabled. + * Also, no need to ping if we got a response recently. */ if (server->tcpStatus == CifsNeedReconnect || - server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || + server->tcpStatus == CifsExiting || + server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || time_before(jiffies, server->lstrp + echo_interval - HZ)) goto requeue_echo; @@ -442,7 +453,7 @@ cifs_echo_request(struct work_struct *work) server->hostname); requeue_echo: - queue_delayed_work(cifsiod_wq, &server->echo, echo_interval); + queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval); } static bool diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 9a28133ac3b8..9b774f4b50c8 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -39,65 +39,54 @@ static void fname_crypt_complete(struct crypto_async_request *req, int res) static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) { - u32 ciphertext_len; struct skcipher_request *req = NULL; DECLARE_FS_COMPLETION_RESULT(ecr); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; - struct scatterlist src_sg, dst_sg; + struct scatterlist sg; int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - char *workbuf, buf[32], *alloc_buf = NULL; - unsigned lim; + unsigned int lim; + unsigned int cryptlen; lim = inode->i_sb->s_cop->max_namelen(inode); if (iname->len <= 0 || iname->len > lim) return -EIO; - ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE); - ciphertext_len = round_up(ciphertext_len, padding); - ciphertext_len = min(ciphertext_len, lim); + /* + * Copy the filename to the output buffer for encrypting in-place and + * pad it with the needed number of NUL bytes. + */ + cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); + cryptlen = round_up(cryptlen, padding); + cryptlen = min(cryptlen, lim); + memcpy(oname->name, iname->name, iname->len); + memset(oname->name + iname->len, 0, cryptlen - iname->len); - if (ciphertext_len <= sizeof(buf)) { - workbuf = buf; - } else { - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); - if (!alloc_buf) - return -ENOMEM; - workbuf = alloc_buf; - } + /* Initialize the IV */ + memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - /* Allocate request */ + /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", __func__); - kfree(alloc_buf); + "%s: skcipher_request_alloc() failed\n", __func__); return -ENOMEM; } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, fname_crypt_complete, &ecr); + sg_init_one(&sg, oname->name, cryptlen); + skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); - /* Copy the input */ - memcpy(workbuf, iname->name, iname->len); - if (iname->len < ciphertext_len) - memset(workbuf + iname->len, 0, ciphertext_len - iname->len); - - /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - - /* Create encryption request */ - sg_init_one(&src_sg, workbuf, ciphertext_len); - sg_init_one(&dst_sg, oname->name, ciphertext_len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + /* Do the encryption */ res = crypto_skcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { + /* Request is being completed asynchronously; wait for it */ wait_for_completion(&ecr.completion); res = ecr.res; } - kfree(alloc_buf); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = ciphertext_len; + oname->len = cryptlen; return 0; } diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 82f0285f5d08..67fb6d8876d0 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode) struct crypto_skcipher *ctfm; const char *cipher_str; int keysize; - u8 raw_key[FS_MAX_KEY_SIZE]; + u8 *raw_key = NULL; int res; res = fscrypt_initialize(); @@ -238,6 +238,15 @@ retry: if (res) goto out; + /* + * This cannot be a stack buffer because it is passed to the scatterlist + * crypto API as part of key derivation. + */ + res = -ENOMEM; + raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS); + if (!raw_key) + goto out; + if (fscrypt_dummy_context_enabled(inode)) { memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); goto got_key; @@ -276,7 +285,8 @@ got_key: if (res) goto out; - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); + raw_key = NULL; if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) { put_crypt_info(crypt_info); goto retry; @@ -287,7 +297,7 @@ out: if (res == -ENOKEY) res = 0; put_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); return res; } @@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot) radix_tree_deref_slot_protected(slot, &mapping->tree_lock); entry |= RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(slot, (void *)entry); + radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); return (void *)entry; } @@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) radix_tree_deref_slot_protected(slot, &mapping->tree_lock); entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(slot, (void *)entry); + radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); return (void *)entry; } @@ -643,12 +643,14 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, } mapping->nrexceptional++; } else { + struct radix_tree_node *node; void **slot; void *ret; - ret = __radix_tree_lookup(page_tree, index, NULL, &slot); + ret = __radix_tree_lookup(page_tree, index, &node, &slot); WARN_ON_ONCE(ret != entry); - radix_tree_replace_slot(slot, new_entry); + __radix_tree_replace(page_tree, node, slot, + new_entry, NULL, NULL); } if (vmf->flags & FAULT_FLAG_WRITE) radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 1e6e227134d7..0643ae44f342 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -16,11 +16,7 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; -static struct genl_family family = { - .id = GENL_ID_GENERATE, - .name = DLM_GENL_NAME, - .version = DLM_GENL_VERSION, -}; +static struct genl_family family; static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size) { @@ -76,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = { }, }; +static struct genl_family family __ro_after_init = { + .name = DLM_GENL_NAME, + .version = DLM_GENL_VERSION, + .ops = dlm_nl_ops, + .n_ops = ARRAY_SIZE(dlm_nl_ops), + .module = THIS_MODULE, +}; + int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, dlm_nl_ops); + return genl_register_family(&family); } void dlm_netlink_exit(void) diff --git a/fs/exec.c b/fs/exec.c index 4e497b9ee71e..923c57d96899 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1169,8 +1169,10 @@ no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; +#ifdef CONFIG_POSIX_TIMERS exit_itimers(sig); flush_itimer_signals(); +#endif if (atomic_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 282a51b07c57..a8a750f59621 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -235,6 +235,7 @@ struct ext4_io_submit { #define EXT4_MAX_BLOCK_SIZE 65536 #define EXT4_MIN_BLOCK_LOG_SIZE 10 #define EXT4_MAX_BLOCK_LOG_SIZE 16 +#define EXT4_MAX_CLUSTER_LOG_SIZE 30 #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) #else diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 20da99da0a34..52b0530c5d65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3565,7 +3565,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d", blocksize); + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); goto failed_mount; } @@ -3697,6 +3705,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05713a5da083..ef600591d96f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1769,15 +1769,13 @@ static long wb_writeback(struct bdi_writeback *wb, * become available for writeback. Otherwise * we'll just busyloop. */ - if (!list_empty(&wb->b_more_io)) { - trace_writeback_wait(wb, work); - inode = wb_inode(wb->b_more_io.prev); - spin_lock(&inode->i_lock); - spin_unlock(&wb->list_lock); - /* This function drops i_lock... */ - inode_sleep_on_writeback(inode); - spin_lock(&wb->list_lock); - } + trace_writeback_wait(wb, work); + inode = wb_inode(wb->b_more_io.prev); + spin_lock(&inode->i_lock); + spin_unlock(&wb->list_lock); + /* This function drops i_lock... */ + inode_sleep_on_writeback(inode); + spin_lock(&wb->list_lock); } spin_unlock(&wb->list_lock); blk_finish_plug(&plug); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6a4d0e5418a1..096f79997f75 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -286,6 +286,11 @@ const struct dentry_operations fuse_dentry_operations = { .d_release = fuse_dentry_release, }; +const struct dentry_operations fuse_root_dentry_operations = { + .d_init = fuse_dentry_init, + .d_release = fuse_dentry_release, +}; + int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || @@ -1734,8 +1739,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) * This should be done on write(), truncate() and chown(). */ if (!fc->handle_killpriv) { - int kill; - /* * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. @@ -1745,12 +1748,11 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) return ret; attr->ia_mode = inode->i_mode; - kill = should_remove_suid(entry); - if (kill & ATTR_KILL_SUID) { + if (inode->i_mode & S_ISUID) { attr->ia_valid |= ATTR_MODE; attr->ia_mode &= ~S_ISUID; } - if (kill & ATTR_KILL_SGID) { + if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { attr->ia_valid |= ATTR_MODE; attr->ia_mode &= ~S_ISGID; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index abc66a6237fd..2401c5dabb2a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1985,6 +1985,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, { struct inode *inode = page->mapping->host; + /* Haven't copied anything? Skip zeroing, size extending, dirtying. */ + if (!copied) + goto unlock; + if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ size_t endoff = (pos + copied) & ~PAGE_MASK; @@ -1995,6 +1999,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, fuse_write_update_size(inode, pos + copied); set_page_dirty(page); + +unlock: unlock_page(page); put_page(page); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0dfbb136e59a..91307940c8ac 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -692,6 +692,7 @@ static inline u64 get_node_id(struct inode *inode) extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; +extern const struct dentry_operations fuse_root_dentry_operations; /** * Inode to nodeid comparison. diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17141099f2e7..6fe6a88ecb4a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1131,10 +1131,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; root = fuse_get_root_inode(sb, d.rootmode); + sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; - /* only now - we want root dentry with NULL ->d_op */ + /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; init_req = fuse_request_alloc(0); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 98b3eb7d8eaf..0ec137310320 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -377,9 +377,9 @@ repeat: { int p; for (p = 0; p < rr->u.ER.len_id; p++) - printk("%c", rr->u.ER.data[p]); + printk(KERN_CONT "%c", rr->u.ER.data[p]); } - printk("\n"); + printk(KERN_CONT "\n"); break; case SIG('P', 'X'): inode->i_mode = isonum_733(rr->u.PX.mode); diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 5426189406c1..fb8cac88251a 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -15,6 +15,6 @@ struct lockd_net { struct list_head nsm_handles; }; -extern int lockd_net_id; +extern unsigned int lockd_net_id; #endif diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index fc4084ef4736..1c13dd80744f 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,7 +57,7 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; -int lockd_net_id; +unsigned int lockd_net_id; /* * These can be set at insmod time (useful for NFS as root filesystem), diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 532d8e242d4d..484bebc20bca 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, } ret = -EPROTONOSUPPORT; - if (minorversion == 0) + if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0) ret = nfs4_callback_up_net(serv, net); else if (xprt->ops->bc_up) ret = xprt->ops->bc_up(serv, net); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf4ec5ecc97e..ce42dd00e4ee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2015,7 +2015,7 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } -int nfs_net_id; +unsigned int nfs_net_id; EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index fbce0d885d4c..5fbd2bde91ba 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -35,6 +35,6 @@ struct nfs_net { #endif }; -extern int nfs_net_id; +extern unsigned int nfs_net_id; #endif diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9b3a82abab07..1452177c822d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; } +static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + return test_bit(NFS_OPEN_STATE, &state->flags) && + nfs4_stateid_match_other(&state->open_stateid, stateid); +} + #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7897826d7c51..241da19b7da4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1451,7 +1451,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state) } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, - nfs4_stateid *arg_stateid, nfs4_stateid *stateid, fmode_t fmode) { clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -1469,10 +1468,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, } if (stateid == NULL) return; - /* Handle races with OPEN */ - if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) || - (nfs4_stateid_match_other(stateid, &state->open_stateid) && - !nfs4_stateid_is_newer(stateid, &state->open_stateid))) { + /* Handle OPEN+OPEN_DOWNGRADE races */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { nfs_resync_open_stateid_locked(state); return; } @@ -1486,7 +1484,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { write_seqlock(&state->seqlock); - nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode); + /* Ignore, if the CLOSE argment doesn't match the current stateid */ + if (nfs4_state_match_open_stateid_other(state, arg_stateid)) + nfs_clear_open_stateid_locked(state, stateid, fmode); write_sequnlock(&state->seqlock); if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(state->owner->so_server->nfs_client); @@ -2564,15 +2564,23 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) static int nfs41_check_expired_locks(struct nfs4_state *state) { int status, ret = NFS_OK; - struct nfs4_lock_state *lsp; + struct nfs4_lock_state *lsp, *prev = NULL; struct nfs_server *server = NFS_SERVER(state->inode); if (!test_bit(LK_STATE_IN_USE, &state->flags)) goto out; + + spin_lock(&state->state_lock); list_for_each_entry(lsp, &state->lock_states, ls_locks) { if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + atomic_inc(&lsp->ls_count); + spin_unlock(&state->state_lock); + + nfs4_put_lock_state(prev); + prev = lsp; + status = nfs41_test_and_free_expired_stateid(server, &lsp->ls_stateid, cred); @@ -2585,10 +2593,14 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) set_bit(NFS_LOCK_LOST, &lsp->ls_flags); } else if (status != NFS_OK) { ret = status; - break; + nfs4_put_lock_state(prev); + goto out; } + spin_lock(&state->state_lock); } - }; + } + spin_unlock(&state->state_lock); + nfs4_put_lock_state(prev); out: return ret; } @@ -3122,7 +3134,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) } else if (is_rdwr) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; - if (!nfs4_valid_open_stateid(state)) + if (!nfs4_valid_open_stateid(state) || + test_bit(NFS_OPEN_STATE, &state->flags) == 0) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -5569,6 +5582,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: @@ -5579,8 +5593,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: task->tk_status = 0; - if (data->roc) - pnfs_roc_set_barrier(data->inode, data->roc_barrier); break; default: if (nfs4_async_handle_error(task, data->res.server, @@ -5590,6 +5602,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) } } data->rpc_status = task->tk_status; + if (data->roc && data->rpc_status == 0) + pnfs_roc_set_barrier(data->inode, data->roc_barrier); } static void nfs4_delegreturn_release(void *calldata) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5f4281ec5f72..0959c9661662 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1547,6 +1547,7 @@ restart: ssleep(1); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index fd8c9a5bcac4..420d3a0ab258 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -9,7 +9,7 @@ #include <net/netns/generic.h> #include <linux/fs.h> -static int grace_net_id; +static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); /** diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index ee36efd5aece..3714231a9d0f 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -124,5 +124,5 @@ struct nfsd_net { /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) -extern int nfsd_net_id; +extern unsigned int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 36b2af931e06..2857e46d5cc5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1201,7 +1201,7 @@ static int create_proc_exports_entry(void) } #endif -int nfsd_net_id; +unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eab..8c9fb29c6673 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -118,7 +118,7 @@ again: return ret; } -static int open_related_ns(struct ns_common *ns, +int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { struct path path = {}; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c5c5b9748ea3..9a88984f9f6f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1950,8 +1950,7 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, } int ocfs2_write_end_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + loff_t pos, unsigned len, unsigned copied, void *fsdata) { int i, ret; unsigned from, to, start = pos & (PAGE_SIZE - 1); @@ -2064,7 +2063,7 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, int ret; struct inode *inode = mapping->host; - ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); + ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata); up_write(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_inode_unlock(inode, 1); @@ -2241,7 +2240,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, dwc->dw_zero_count++; } - ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc); + ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); BUG_ON(ret != len); ret = 0; unlock: diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index b1c9f28a57b1..8614ff069d99 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -44,8 +44,7 @@ int walk_page_buffers( handle_t *handle, struct buffer_head *bh)); int ocfs2_write_end_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); + loff_t pos, unsigned len, unsigned copied, void *fsdata); typedef enum { OCFS2_WRITE_BUFFER = 0, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 636abcbd4650..9158c9825094 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -741,7 +741,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg, hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block; memset(hb_block, 0, reg->hr_block_bytes); /* TODO: time stuff */ - cputime = CURRENT_TIME.tv_sec; + cputime = ktime_get_real_seconds(); if (!cputime) cputime = 1; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3f828a187049..a464c8088170 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1609,8 +1609,6 @@ way_up_top: __dlm_insert_mle(dlm, mle); response = DLM_MASTER_RESP_NO; } else { - // mlog(0, "mle was found\n"); - set_maybe = 1; spin_lock(&tmpmle->spinlock); if (tmpmle->master == dlm->node_num) { mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n"); @@ -1625,8 +1623,7 @@ way_up_top: response = DLM_MASTER_RESP_NO; } else response = DLM_MASTER_RESP_MAYBE; - if (set_maybe) - set_bit(request->node_idx, tmpmle->maybe_map); + set_bit(request->node_idx, tmpmle->maybe_map); spin_unlock(&tmpmle->spinlock); } spin_unlock(&dlm->master_lock); @@ -1644,12 +1641,6 @@ send_response: * dlm_assert_master_worker() isn't called, we drop it here. */ if (dispatch_assert) { - if (response != DLM_MASTER_RESP_YES) - mlog(ML_ERROR, "invalid response %d\n", response); - if (!res) { - mlog(ML_ERROR, "bad lockres while trying to assert!\n"); - BUG(); - } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); spin_lock(&res->spinlock); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index dd5cb8bcefd1..74407c6dd592 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2966,8 +2966,6 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&dlm->spinlock); dlm_kick_recovery_thread(dlm); break; - default: - BUG(); } mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index c56a7679df93..382401d3e88f 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -703,7 +703,7 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail_commit; } - di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); + di->i_dtime = cpu_to_le64(ktime_get_real_seconds()); di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a244f14c6b87..d5e5fa7f0743 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1947,7 +1947,7 @@ static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) */ seqno++; os->os_count++; - os->os_scantime = CURRENT_TIME; + os->os_scantime = ktime_get_seconds(); unlock: ocfs2_orphan_scan_unlock(osb, seqno); out: @@ -2004,7 +2004,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; - os->os_scantime = CURRENT_TIME; + os->os_scantime = ktime_get_seconds(); if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); else { diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 71545ad4628c..429088786e93 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -120,8 +120,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, ret = VM_FAULT_NOPAGE; goto out; } - ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, - fsdata); + ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); BUG_ON(ret != len); ret = VM_FAULT_LOCKED; out: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8d887c75765c..3b0a10d9b36f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -516,6 +516,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, struct ocfs2_extent_list *fel; u16 feat; struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct timespec64 ts; *new_fe_bh = NULL; @@ -564,10 +565,11 @@ static int __ocfs2_mknod_locked(struct inode *dir, fe->i_last_eb_blk = 0; strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); + ktime_get_real_ts64(&ts); fe->i_atime = fe->i_ctime = fe->i_mtime = - cpu_to_le64(CURRENT_TIME.tv_sec); + cpu_to_le64(ts.tv_sec); fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = - cpu_to_le32(CURRENT_TIME.tv_nsec); + cpu_to_le32(ts.tv_nsec); fe->i_dtime = 0; /* diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e63af7ddfe68..7e5958b0be6b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -224,7 +224,7 @@ struct ocfs2_orphan_scan { struct ocfs2_super *os_osb; struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ struct delayed_work os_orphan_scan_work; - struct timespec os_scantime; /* time this node ran the scan */ + time64_t os_scantime; /* time this node ran the scan */ u32 os_count; /* tracks node specific scans */ u32 os_seqno; /* tracks cluster wide scans */ atomic_t os_state; /* ACTIVE or INACTIVE */ diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19238512a324..738b4ea8e990 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -478,7 +478,6 @@ again: if (ret) { mlog_errno(ret); ocfs2_unlock_refcount_tree(osb, tree, rw); - ocfs2_refcount_tree_put(tree); goto out; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f56fe39fab04..c894d945b084 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -337,7 +337,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) out += snprintf(buf + out, len - out, "Disabled\n"); else out += snprintf(buf + out, len - out, "%lu seconds ago\n", - (get_seconds() - os->os_scantime.tv_sec)); + (unsigned long)(ktime_get_seconds() - os->os_scantime)); out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index d484068ca716..38887cc5577f 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -114,6 +114,7 @@ static const struct seq_operations help_debug_ops = { }; const struct file_operations debug_help_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_help_open, .read = seq_read, .release = seq_release, @@ -121,6 +122,7 @@ const struct file_operations debug_help_fops = { }; static const struct file_operations kernel_debug_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_open, .read = orangefs_debug_read, .write = orangefs_debug_write, diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index edd46a0e951d..0e100856c7b8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -328,11 +328,11 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (!real) goto bug; + /* Handle recursion */ + real = d_real(real, inode, open_flags); + if (!inode || inode == d_inode(real)) return real; - - /* Handle recursion */ - return d_real(real, inode, open_flags); bug: WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); diff --git a/fs/proc/array.c b/fs/proc/array.c index 81818adb8e9e..51a4213afa2e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -245,7 +245,7 @@ void render_sigset_t(struct seq_file *m, const char *header, if (sigismember(set, i+2)) x |= 2; if (sigismember(set, i+3)) x |= 4; if (sigismember(set, i+4)) x |= 8; - seq_printf(m, "%x", x); + seq_putc(m, hex_asc[x]); } while (i >= 4); seq_putc(m, '\n'); @@ -342,10 +342,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) static inline void task_seccomp(struct seq_file *m, struct task_struct *p) { + seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p)); #ifdef CONFIG_SECCOMP - seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); - seq_putc(m, '\n'); + seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); #endif + seq_putc(m, '\n'); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index ca651ac00660..9b99df4893a4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -104,9 +104,12 @@ * in /proc for a task before it execs a suid executable. */ +static u8 nlink_tid; +static u8 nlink_tgid; + struct pid_entry { const char *name; - int len; + unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; @@ -139,13 +142,13 @@ struct pid_entry { * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ -static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, +static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; - count = 0; + count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; @@ -1967,7 +1970,7 @@ out: struct map_files_info { fmode_t mode; - unsigned long len; + unsigned int len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -2412,14 +2415,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir, * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ - last = &ents[nents - 1]; - for (p = ents; p <= last; p++) { + last = &ents[nents]; + for (p = ents; p < last; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) break; } - if (p > last) + if (p >= last) goto out; error = proc_pident_instantiate(dir, dentry, task, p); @@ -2444,7 +2447,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx, if (ctx->pos >= nents + 2) goto out; - for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { + for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; @@ -3068,8 +3071,7 @@ static int proc_pid_instantiate(struct inode *dir, inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, - ARRAY_SIZE(tgid_base_stuff))); + set_nlink(inode, nlink_tgid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3361,8 +3363,7 @@ static int proc_task_instantiate(struct inode *dir, inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, - ARRAY_SIZE(tid_base_stuff))); + set_nlink(inode, nlink_tid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3552,3 +3553,9 @@ static const struct file_operations proc_task_operations = { .iterate_shared = proc_task_readdir, .llseek = generic_file_llseek, }; + +void __init set_proc_pid_nlink(void) +{ + nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); + nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index e69ebe648a34..783bc19644d1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -138,6 +138,16 @@ static void unuse_pde(struct proc_dir_entry *pde) /* pde is locked */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) { + /* + * close() (proc_reg_release()) can't delete an entry and proceed: + * ->release hook needs to be available at the right moment. + * + * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: + * "struct file" needs to be available at the right moment. + * + * Therefore, first process to enter this function does ->release() and + * signals its completion to the other process which does nothing. + */ if (pdeo->closing) { /* somebody else is doing that, just wait */ DECLARE_COMPLETION_ONSTACK(c); @@ -147,12 +157,13 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); } else { struct file *file; - pdeo->closing = 1; + pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); file = pdeo->file; pde->proc_fops->release(file_inode(file), file); spin_lock(&pde->pde_unload_lock); - list_del_init(&pdeo->lh); + /* After ->release. */ + list_del(&pdeo->lh); if (pdeo->c) complete(pdeo->c); kfree(pdeo); @@ -167,6 +178,8 @@ void proc_entry_rundown(struct proc_dir_entry *de) if (atomic_add_return(BIAS, &de->in_use) != BIAS) wait_for_completion(&c); + /* ->pde_openers list can't grow from now on. */ + spin_lock(&de->pde_unload_lock); while (!list_empty(&de->pde_openers)) { struct pde_opener *pdeo; @@ -312,16 +325,17 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct pde_opener *pdeo; /* - * What for, you ask? Well, we can have open, rmmod, remove_proc_entry - * sequence. ->release won't be called because ->proc_fops will be - * cleared. Depending on complexity of ->release, consequences vary. + * Ensure that + * 1) PDE's ->release hook will be called no matter what + * either normally by close()/->release, or forcefully by + * rmmod/remove_proc_entry. + * + * 2) rmmod isn't blocked by opening file in /proc and sitting on + * the descriptor (including "rmmod foo </proc/foo" scenario). * - * We can't wait for mercy when close will be done for real, it's - * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release - * by hand in remove_proc_entry(). For this, save opener's credentials - * for later. + * Save every "struct file" with custom ->release hook. */ - pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL); + pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); if (!pdeo) return -ENOMEM; @@ -338,7 +352,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (rv == 0 && release) { /* To know what to release. */ pdeo->file = file; - /* Strictly for "too late" ->release in proc_reg_release(). */ + pdeo->closing = false; + pdeo->c = NULL; spin_lock(&pde->pde_unload_lock); list_add(&pdeo->lh, &pde->pde_openers); spin_unlock(&pde->pde_unload_lock); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5378441ec1b7..bbba5d22aada 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -203,7 +203,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name); struct pde_opener { struct file *file; struct list_head lh; - int closing; + bool closing; struct completion *c; }; extern const struct inode_operations proc_link_inode_operations; @@ -211,6 +211,7 @@ extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern void proc_init_inodecache(void); +void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); diff --git a/fs/proc/root.c b/fs/proc/root.c index 8d3e484055a6..4bd0373576b5 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -122,6 +122,7 @@ void __init proc_root_init(void) int err; proc_init_inodecache(); + set_proc_pid_nlink(); err = register_filesystem(&proc_fs_type); if (err) return; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 35b92d81692f..958f32545064 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1588,6 +1588,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); + cond_resched(); return 0; } #ifdef CONFIG_HUGETLB_PAGE diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index be40813eff52..b42e5bd6d8ff 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -86,4 +86,4 @@ config PSTORE_RAM Note that for historical reasons, the module will be named "ramoops.ko". - For more information, see Documentation/ramoops.txt. + For more information, see Documentation/admin-guide/ramoops.rst. diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index d4887705bb61..899d0ba0bd6c 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -27,6 +27,9 @@ #include <asm/barrier.h> #include "internal.h" +/* This doesn't need to be atomic: speed is chosen over correctness here. */ +static u64 pstore_ftrace_stamp; + static void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, @@ -42,6 +45,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, rec.ip = ip; rec.parent_ip = parent_ip; + pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++); pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, 0, sizeof(rec), psinfo); @@ -71,10 +75,13 @@ static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf, if (!on ^ pstore_ftrace_enabled) goto out; - if (on) + if (on) { + ftrace_ops_set_global_filter(&pstore_ftrace_ops); ret = register_ftrace_function(&pstore_ftrace_ops); - else + } else { ret = unregister_ftrace_function(&pstore_ftrace_ops); + } + if (ret) { pr_err("%s: unable to %sregister ftrace ops: %zd\n", __func__, on ? "" : "un", ret); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 1781dc50762e..57c0646479f5 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -107,9 +107,11 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v) struct pstore_ftrace_seq_data *data = v; struct pstore_ftrace_record *rec = (void *)(ps->data + data->off); - seq_printf(s, "%d %08lx %08lx %pf <- %pF\n", - pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip, - (void *)rec->ip, (void *)rec->parent_ip); + seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n", + pstore_ftrace_decode_cpu(rec), + pstore_ftrace_read_timestamp(rec), + rec->ip, rec->parent_ip, (void *)rec->ip, + (void *)rec->parent_ip); return 0; } @@ -197,11 +199,14 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) if (err) return err; - if (p->psi->erase) + if (p->psi->erase) { + mutex_lock(&p->psi->read_mutex); p->psi->erase(p->type, p->id, p->count, d_inode(dentry)->i_ctime, p->psi); - else + mutex_unlock(&p->psi->read_mutex); + } else { return -EPERM; + } return simple_unlink(dir, dentry); } diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index e38a22b31282..da416e6591c9 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -5,40 +5,6 @@ #include <linux/time.h> #include <linux/pstore.h> -#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB) -#define PSTORE_CPU_IN_IP 0x1 -#elif NR_CPUS <= 4 && defined(CONFIG_ARM) -#define PSTORE_CPU_IN_IP 0x3 -#endif - -struct pstore_ftrace_record { - unsigned long ip; - unsigned long parent_ip; -#ifndef PSTORE_CPU_IN_IP - unsigned int cpu; -#endif -}; - -static inline void -pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu) -{ -#ifndef PSTORE_CPU_IN_IP - rec->cpu = cpu; -#else - rec->ip |= cpu; -#endif -} - -static inline unsigned int -pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec) -{ -#ifndef PSTORE_CPU_IN_IP - return rec->cpu; -#else - return rec->ip & PSTORE_CPU_IN_IP; -#endif -} - #ifdef CONFIG_PSTORE_FTRACE extern void pstore_register_ftrace(void); extern void pstore_unregister_ftrace(void); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 14984d902a99..729677e18e36 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -493,6 +493,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, if (!is_locked) { pr_err("pstore dump routine blocked in %s path, may corrupt error record\n" , in_nmi() ? "NMI" : why); + return; } } else { spin_lock_irqsave(&psinfo->buf_lock, flags); @@ -584,8 +585,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } else { spin_lock_irqsave(&psinfo->buf_lock, flags); } - memcpy(psinfo->buf, s, c); - psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); + psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0, + s, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 6ad831b9d1b8..27c059e1760a 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -85,10 +85,10 @@ MODULE_PARM_DESC(ramoops_ecc, "bytes ECC)"); struct ramoops_context { - struct persistent_ram_zone **przs; - struct persistent_ram_zone *cprz; - struct persistent_ram_zone *fprz; - struct persistent_ram_zone *mprz; + struct persistent_ram_zone **dprzs; /* Oops dump zones */ + struct persistent_ram_zone *cprz; /* Console zone */ + struct persistent_ram_zone **fprzs; /* Ftrace zones */ + struct persistent_ram_zone *mprz; /* PMSG zone */ phys_addr_t phys_addr; unsigned long size; unsigned int memtype; @@ -97,12 +97,14 @@ struct ramoops_context { size_t ftrace_size; size_t pmsg_size; int dump_oops; + u32 flags; struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; unsigned int dump_write_cnt; /* _read_cnt need clear on ramoops_pstore_open */ unsigned int dump_read_cnt; unsigned int console_read_cnt; + unsigned int max_ftrace_cnt; unsigned int ftrace_read_cnt; unsigned int pmsg_read_cnt; struct pstore_info pstore; @@ -180,16 +182,69 @@ static bool prz_ok(struct persistent_ram_zone *prz) persistent_ram_ecc_string(prz, NULL, 0)); } +static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, + struct persistent_ram_zone *src) +{ + size_t dest_size, src_size, total, dest_off, src_off; + size_t dest_idx = 0, src_idx = 0, merged_idx = 0; + void *merged_buf; + struct pstore_ftrace_record *drec, *srec, *mrec; + size_t record_size = sizeof(struct pstore_ftrace_record); + + dest_off = dest->old_log_size % record_size; + dest_size = dest->old_log_size - dest_off; + + src_off = src->old_log_size % record_size; + src_size = src->old_log_size - src_off; + + total = dest_size + src_size; + merged_buf = kmalloc(total, GFP_KERNEL); + if (!merged_buf) + return -ENOMEM; + + drec = (struct pstore_ftrace_record *)(dest->old_log + dest_off); + srec = (struct pstore_ftrace_record *)(src->old_log + src_off); + mrec = (struct pstore_ftrace_record *)(merged_buf); + + while (dest_size > 0 && src_size > 0) { + if (pstore_ftrace_read_timestamp(&drec[dest_idx]) < + pstore_ftrace_read_timestamp(&srec[src_idx])) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } else { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + } + + while (dest_size > 0) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } + + while (src_size > 0) { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + + kfree(dest->old_log); + dest->old_log = merged_buf; + dest->old_log_size = total; + + return 0; +} + static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, int *count, struct timespec *time, char **buf, bool *compressed, ssize_t *ecc_notice_size, struct pstore_info *psi) { - ssize_t size; + ssize_t size = 0; struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz = NULL; int header_length = 0; + bool free_prz = false; /* Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but * PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have @@ -201,7 +256,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, /* Find the next valid persistent_ram_zone for DMESG */ while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) { - prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, + prz = ramoops_get_next_prz(cxt->dprzs, &cxt->dump_read_cnt, cxt->max_dump_cnt, id, type, PSTORE_TYPE_DMESG, 1); if (!prz_ok(prz)) @@ -219,14 +274,56 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, 1, id, type, PSTORE_TYPE_CONSOLE, 0); - if (!prz_ok(prz)) - prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, - 1, id, type, PSTORE_TYPE_FTRACE, 0); + if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, 1, id, type, PSTORE_TYPE_PMSG, 0); - if (!prz_ok(prz)) - return 0; + + /* ftrace is last since it may want to dynamically allocate memory. */ + if (!prz_ok(prz)) { + if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) { + prz = ramoops_get_next_prz(cxt->fprzs, + &cxt->ftrace_read_cnt, 1, id, type, + PSTORE_TYPE_FTRACE, 0); + } else { + /* + * Build a new dummy record which combines all the + * per-cpu records including metadata and ecc info. + */ + struct persistent_ram_zone *tmp_prz, *prz_next; + + tmp_prz = kzalloc(sizeof(struct persistent_ram_zone), + GFP_KERNEL); + if (!tmp_prz) + return -ENOMEM; + free_prz = true; + + while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { + prz_next = ramoops_get_next_prz(cxt->fprzs, + &cxt->ftrace_read_cnt, + cxt->max_ftrace_cnt, id, + type, PSTORE_TYPE_FTRACE, 0); + + if (!prz_ok(prz_next)) + continue; + + tmp_prz->ecc_info = prz_next->ecc_info; + tmp_prz->corrected_bytes += + prz_next->corrected_bytes; + tmp_prz->bad_blocks += prz_next->bad_blocks; + size = ftrace_log_combine(tmp_prz, prz_next); + if (size) + goto out; + } + *id = 0; + prz = tmp_prz; + } + } + + if (!prz_ok(prz)) { + size = 0; + goto out; + } size = persistent_ram_old_size(prz) - header_length; @@ -234,12 +331,21 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, *ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); *buf = kmalloc(size + *ecc_notice_size + 1, GFP_KERNEL); - if (*buf == NULL) - return -ENOMEM; + if (*buf == NULL) { + size = -ENOMEM; + goto out; + } memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size); + persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1); +out: + if (free_prz) { + kfree(prz->old_log); + kfree(prz); + } + return size; } @@ -283,15 +389,23 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, persistent_ram_write(cxt->cprz, buf, size); return 0; } else if (type == PSTORE_TYPE_FTRACE) { - if (!cxt->fprz) + int zonenum; + + if (!cxt->fprzs) return -ENOMEM; - persistent_ram_write(cxt->fprz, buf, size); + /* + * Choose zone by if we're using per-cpu buffers. + */ + if (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + zonenum = smp_processor_id(); + else + zonenum = 0; + + persistent_ram_write(cxt->fprzs[zonenum], buf, size); return 0; } else if (type == PSTORE_TYPE_PMSG) { - if (!cxt->mprz) - return -ENOMEM; - persistent_ram_write(cxt->mprz, buf, size); - return 0; + pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__); + return -EINVAL; } if (type != PSTORE_TYPE_DMESG) @@ -316,10 +430,10 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, if (part != 1) return -ENOSPC; - if (!cxt->przs) + if (!cxt->dprzs) return -ENOSPC; - prz = cxt->przs[cxt->dump_write_cnt]; + prz = cxt->dprzs[cxt->dump_write_cnt]; hlen = ramoops_write_kmsg_hdr(prz, compressed); if (size + hlen > prz->buffer_size) @@ -359,13 +473,15 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, case PSTORE_TYPE_DMESG: if (id >= cxt->max_dump_cnt) return -EINVAL; - prz = cxt->przs[id]; + prz = cxt->dprzs[id]; break; case PSTORE_TYPE_CONSOLE: prz = cxt->cprz; break; case PSTORE_TYPE_FTRACE: - prz = cxt->fprz; + if (id >= cxt->max_ftrace_cnt) + return -EINVAL; + prz = cxt->fprzs[id]; break; case PSTORE_TYPE_PMSG: prz = cxt->mprz; @@ -396,68 +512,113 @@ static void ramoops_free_przs(struct ramoops_context *cxt) { int i; - if (!cxt->przs) - return; + /* Free dump PRZs */ + if (cxt->dprzs) { + for (i = 0; i < cxt->max_dump_cnt; i++) + persistent_ram_free(cxt->dprzs[i]); - for (i = 0; i < cxt->max_dump_cnt; i++) - persistent_ram_free(cxt->przs[i]); + kfree(cxt->dprzs); + cxt->max_dump_cnt = 0; + } - kfree(cxt->przs); - cxt->max_dump_cnt = 0; + /* Free ftrace PRZs */ + if (cxt->fprzs) { + for (i = 0; i < cxt->max_ftrace_cnt; i++) + persistent_ram_free(cxt->fprzs[i]); + kfree(cxt->fprzs); + cxt->max_ftrace_cnt = 0; + } } -static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, - phys_addr_t *paddr, size_t dump_mem_sz) +static int ramoops_init_przs(const char *name, + struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone ***przs, + phys_addr_t *paddr, size_t mem_sz, + ssize_t record_size, + unsigned int *cnt, u32 sig, u32 flags) { int err = -ENOMEM; int i; + size_t zone_sz; + struct persistent_ram_zone **prz_ar; - if (!cxt->record_size) + /* Allocate nothing for 0 mem_sz or 0 record_size. */ + if (mem_sz == 0 || record_size == 0) { + *cnt = 0; return 0; + } - if (*paddr + dump_mem_sz - cxt->phys_addr > cxt->size) { - dev_err(dev, "no room for dumps\n"); - return -ENOMEM; + /* + * If we have a negative record size, calculate it based on + * mem_sz / *cnt. If we have a positive record size, calculate + * cnt from mem_sz / record_size. + */ + if (record_size < 0) { + if (*cnt == 0) + return 0; + record_size = mem_sz / *cnt; + if (record_size == 0) { + dev_err(dev, "%s record size == 0 (%zu / %u)\n", + name, mem_sz, *cnt); + goto fail; + } + } else { + *cnt = mem_sz / record_size; + if (*cnt == 0) { + dev_err(dev, "%s record count == 0 (%zu / %zu)\n", + name, mem_sz, record_size); + goto fail; + } } - cxt->max_dump_cnt = dump_mem_sz / cxt->record_size; - if (!cxt->max_dump_cnt) - return -ENOMEM; + if (*paddr + mem_sz - cxt->phys_addr > cxt->size) { + dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", + name, + mem_sz, (unsigned long long)*paddr, + cxt->size, (unsigned long long)cxt->phys_addr); + goto fail; + } - cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt, - GFP_KERNEL); - if (!cxt->przs) { - dev_err(dev, "failed to initialize a prz array for dumps\n"); - goto fail_mem; + zone_sz = mem_sz / *cnt; + if (!zone_sz) { + dev_err(dev, "%s zone size == 0\n", name); + goto fail; } - for (i = 0; i < cxt->max_dump_cnt; i++) { - cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0, + prz_ar = kcalloc(*cnt, sizeof(**przs), GFP_KERNEL); + if (!prz_ar) + goto fail; + + for (i = 0; i < *cnt; i++) { + prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig, &cxt->ecc_info, - cxt->memtype); - if (IS_ERR(cxt->przs[i])) { - err = PTR_ERR(cxt->przs[i]); - dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", - cxt->record_size, (unsigned long long)*paddr, err); + cxt->memtype, flags); + if (IS_ERR(prz_ar[i])) { + err = PTR_ERR(prz_ar[i]); + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, record_size, + (unsigned long long)*paddr, err); while (i > 0) { i--; - persistent_ram_free(cxt->przs[i]); + persistent_ram_free(prz_ar[i]); } - goto fail_prz; + kfree(prz_ar); + goto fail; } - *paddr += cxt->record_size; + *paddr += zone_sz; } + *przs = prz_ar; return 0; -fail_prz: - kfree(cxt->przs); -fail_mem: - cxt->max_dump_cnt = 0; + +fail: + *cnt = 0; return err; } -static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, +static int ramoops_init_prz(const char *name, + struct device *dev, struct ramoops_context *cxt, struct persistent_ram_zone **prz, phys_addr_t *paddr, size_t sz, u32 sig) { @@ -465,18 +626,19 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return 0; if (*paddr + sz - cxt->phys_addr > cxt->size) { - dev_err(dev, "no room for mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", - sz, (unsigned long long)*paddr, + dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", + name, sz, (unsigned long long)*paddr, cxt->size, (unsigned long long)cxt->phys_addr); return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, + cxt->memtype, 0); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); - dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", - sz, (unsigned long long)*paddr, err); + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, sz, (unsigned long long)*paddr, err); return err; } @@ -543,6 +705,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, parse_size("ftrace-size", pdata->ftrace_size); parse_size("pmsg-size", pdata->pmsg_size); parse_size("ecc-size", pdata->ecc_info.ecc_size); + parse_size("flags", pdata->flags); #undef parse_size @@ -561,6 +724,7 @@ static int ramoops_probe(struct platform_device *pdev) if (dev_of_node(dev) && !pdata) { pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) { + pr_err("cannot allocate platform data buffer\n"); err = -ENOMEM; goto fail_out; } @@ -570,11 +734,20 @@ static int ramoops_probe(struct platform_device *pdev) goto fail_out; } - /* Only a single ramoops area allowed at a time, so fail extra + /* + * Only a single ramoops area allowed at a time, so fail extra * probes. */ - if (cxt->max_dump_cnt) + if (cxt->max_dump_cnt) { + pr_err("already initialized\n"); goto fail_out; + } + + /* Make sure we didn't get bogus platform data pointer. */ + if (!pdata) { + pr_err("NULL platform data\n"); + goto fail_out; + } if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && !pdata->ftrace_size && !pdata->pmsg_size)) { @@ -600,27 +773,37 @@ static int ramoops_probe(struct platform_device *pdev) cxt->ftrace_size = pdata->ftrace_size; cxt->pmsg_size = pdata->pmsg_size; cxt->dump_oops = pdata->dump_oops; + cxt->flags = pdata->flags; cxt->ecc_info = pdata->ecc_info; paddr = cxt->phys_addr; dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size - cxt->pmsg_size; - err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); + err = ramoops_init_przs("dump", dev, cxt, &cxt->dprzs, &paddr, + dump_mem_sz, cxt->record_size, + &cxt->max_dump_cnt, 0, 0); if (err) goto fail_out; - err = ramoops_init_prz(dev, cxt, &cxt->cprz, &paddr, + err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr, cxt->console_size, 0); if (err) goto fail_init_cprz; - err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size, - LINUX_VERSION_CODE); + cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + ? nr_cpu_ids + : 1; + err = ramoops_init_przs("ftrace", dev, cxt, &cxt->fprzs, &paddr, + cxt->ftrace_size, -1, + &cxt->max_ftrace_cnt, LINUX_VERSION_CODE, + (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + ? PRZ_FLAG_NO_LOCK : 0); if (err) goto fail_init_fprz; - err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0); + err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr, + cxt->pmsg_size, 0); if (err) goto fail_init_mprz; @@ -680,7 +863,6 @@ fail_clear: cxt->pstore.bufsize = 0; persistent_ram_free(cxt->mprz); fail_init_mprz: - persistent_ram_free(cxt->fprz); fail_init_fprz: persistent_ram_free(cxt->cprz); fail_init_cprz: @@ -699,7 +881,6 @@ static int ramoops_remove(struct platform_device *pdev) cxt->pstore.bufsize = 0; persistent_ram_free(cxt->mprz); - persistent_ram_free(cxt->fprz); persistent_ram_free(cxt->cprz); ramoops_free_przs(cxt); @@ -741,6 +922,8 @@ static void ramoops_register_dummy(void) dummy_data->ftrace_size = ramoops_ftrace_size; dummy_data->pmsg_size = ramoops_pmsg_size; dummy_data->dump_oops = dump_oops; + dummy_data->flags = RAMOOPS_FLAG_FTRACE_PER_CPU; + /* * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC * (using 1 byte for ECC isn't much of use anyway). diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3975deec02f8..a857338b7dab 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -48,16 +48,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) return atomic_read(&prz->buffer->start); } -static DEFINE_RAW_SPINLOCK(buffer_lock); - /* increase and wrap the start pointer, returning the old value */ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) { int old; int new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->start); new = old + a; @@ -65,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) new -= prz->buffer_size; atomic_set(&prz->buffer->start, new); - raw_spin_unlock_irqrestore(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); return old; } @@ -75,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) { size_t old; size_t new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->size); if (old == prz->buffer_size) @@ -89,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) atomic_set(&prz->buffer->size, new); exit: - raw_spin_unlock_irqrestore(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); } static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, @@ -465,7 +467,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, - struct persistent_ram_ecc_info *ecc_info) + struct persistent_ram_ecc_info *ecc_info, + unsigned long flags) { int ret; @@ -493,6 +496,8 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, prz->buffer->sig = sig; persistent_ram_zap(prz); + prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + prz->flags = flags; return 0; } @@ -517,7 +522,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype) + unsigned int memtype, u32 flags) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -532,7 +537,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, if (ret) goto err; - ret = persistent_ram_post_init(prz, sig, ecc_info); + ret = persistent_ram_post_init(prz, sig, ecc_info, flags); if (ret) goto err; diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 8b252673d454..e99b1a72d9a7 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -12,14 +12,8 @@ static const struct genl_multicast_group quota_mcgrps[] = { }; /* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - /* - * Needed due to multicast group ID abuse - old code assumed - * the family ID was also a valid multicast group ID (which - * isn't true) and userspace might thus rely on it. Assign a - * static ID for this group to make dealing with that easier. - */ - .id = GENL_ID_VFS_DQUOT, +static struct genl_family quota_genl_family __ro_after_init = { + .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, diff --git a/fs/splice.c b/fs/splice.c index dcaf185a5731..5a7750bd2eea 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -408,7 +408,8 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, if (res <= 0) return -ENOMEM; - nr_pages = res / PAGE_SIZE; + BUG_ON(dummy); + nr_pages = DIV_ROUND_UP(res, PAGE_SIZE); vec = __vec; if (nr_pages > PIPE_DEF_BUFFERS) { diff --git a/fs/xattr.c b/fs/xattr.c index 3368659c471e..2d13b4e62fae 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -170,7 +170,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - int error = -EOPNOTSUPP; + int error = -EAGAIN; int issec = !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); @@ -183,15 +183,21 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (issec) { - const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - + } else { if (unlikely(is_bad_inode(inode))) return -EIO; - error = security_inode_setsecurity(inode, suffix, value, - size, flags); - if (!error) - fsnotify_xattr(dentry); + } + if (error == -EAGAIN) { + error = -EOPNOTSUPP; + + if (issec) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + + error = security_inode_setsecurity(inode, suffix, value, + size, flags); + if (!error) + fsnotify_xattr(dentry); + } } return error; |