diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/addr_list.c | 101 | ||||
-rw-r--r-- | fs/afs/cell.c | 17 | ||||
-rw-r--r-- | fs/afs/dynroot.c | 2 | ||||
-rw-r--r-- | fs/afs/internal.h | 12 | ||||
-rw-r--r-- | fs/afs/main.c | 2 | ||||
-rw-r--r-- | fs/afs/proc.c | 7 | ||||
-rw-r--r-- | fs/afs/rxrpc.c | 2 | ||||
-rw-r--r-- | fs/cachefiles/namei.c | 2 | ||||
-rw-r--r-- | fs/dax.c | 13 | ||||
-rw-r--r-- | fs/fat/fatent.c | 1 | ||||
-rw-r--r-- | fs/fscache/cookie.c | 31 | ||||
-rw-r--r-- | fs/fscache/internal.h | 1 | ||||
-rw-r--r-- | fs/fscache/main.c | 4 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 2 | ||||
-rw-r--r-- | fs/ubifs/super.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 200 |
17 files changed, 271 insertions, 132 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 025a9a5e1c32..55a756c60746 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -17,11 +17,6 @@ #include "internal.h" #include "afs_fs.h" -//#define AFS_MAX_ADDRESSES -// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / -// sizeof(struct sockaddr_rxrpc))) -#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) - /* * Release an address list. */ @@ -43,11 +38,15 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, _enter("%u,%u,%u", nr, service, port); + if (nr > AFS_MAX_ADDRESSES) + nr = AFS_MAX_ADDRESSES; + alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL); if (!alist) return NULL; refcount_set(&alist->usage, 1); + alist->max_addrs = nr; for (i = 0; i < nr; i++) { struct sockaddr_rxrpc *srx = &alist->addrs[i]; @@ -109,8 +108,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } while (p < end); _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); - if (nr > AFS_MAX_ADDRESSES) - nr = AFS_MAX_ADDRESSES; alist = afs_alloc_addrlist(nr, service, port); if (!alist) @@ -119,8 +116,10 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, /* Extract the addresses */ p = text; do { - struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; const char *q, *stop; + unsigned int xport = port; + __be32 x[4]; + int family; if (*p == delim) { p++; @@ -136,19 +135,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, break; } - if (in4_pton(p, q - p, - (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - -1, &stop)) { - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(p, q - p, - srx->transport.sin6.sin6_addr.s6_addr, - -1, &stop)) { - /* Nothing to do */ - } else { + if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) + family = AF_INET; + else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) + family = AF_INET6; + else goto bad_address; - } if (stop != q) goto bad_address; @@ -160,7 +152,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, if (p < end) { if (*p == '+') { /* Port number specification "+1234" */ - unsigned int xport = 0; + xport = 0; p++; if (p >= end || !isdigit(*p)) goto bad_address; @@ -171,7 +163,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, goto bad_address; p++; } while (p < end && isdigit(*p)); - srx->transport.sin6.sin6_port = htons(xport); } else if (*p == delim) { p++; } else { @@ -179,8 +170,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } } - alist->nr_addrs++; - } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES); + if (family == AF_INET) + afs_merge_fs_addr4(alist, x[0], xport); + else + afs_merge_fs_addr6(alist, x, xport); + + } while (p < end); _leave(" = [nr %u]", alist->nr_addrs); return alist; @@ -237,19 +232,23 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) */ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) { - struct sockaddr_in6 *a; - __be16 xport = htons(port); + struct sockaddr_rxrpc *srx; + u32 addr = ntohl(xdr); int i; + if (alist->nr_addrs >= alist->max_addrs) + return; + for (i = 0; i < alist->nr_ipv4; i++) { - a = &alist->addrs[i].transport.sin6; - if (xdr == a->sin6_addr.s6_addr32[3] && - xport == a->sin6_port) + struct sockaddr_in *a = &alist->addrs[i].transport.sin; + u32 a_addr = ntohl(a->sin_addr.s_addr); + u16 a_port = ntohs(a->sin_port); + + if (addr == a_addr && port == a_port) return; - if (xdr == a->sin6_addr.s6_addr32[3] && - (u16 __force)xport < (u16 __force)a->sin6_port) + if (addr == a_addr && port < a_port) break; - if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3]) + if (addr < a_addr) break; } @@ -258,12 +257,11 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - a = &alist->addrs[i].transport.sin6; - a->sin6_port = xport; - a->sin6_addr.s6_addr32[0] = 0; - a->sin6_addr.s6_addr32[1] = 0; - a->sin6_addr.s6_addr32[2] = htonl(0xffff); - a->sin6_addr.s6_addr32[3] = xdr; + srx = &alist->addrs[i]; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = htons(port); + srx->transport.sin.sin_addr.s_addr = xdr; alist->nr_ipv4++; alist->nr_addrs++; } @@ -273,18 +271,20 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) */ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) { - struct sockaddr_in6 *a; - __be16 xport = htons(port); + struct sockaddr_rxrpc *srx; int i, diff; + if (alist->nr_addrs >= alist->max_addrs) + return; + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { - a = &alist->addrs[i].transport.sin6; + struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6; + u16 a_port = ntohs(a->sin6_port); + diff = memcmp(xdr, &a->sin6_addr, 16); - if (diff == 0 && - xport == a->sin6_port) + if (diff == 0 && port == a_port) return; - if (diff == 0 && - (u16 __force)xport < (u16 __force)a->sin6_port) + if (diff == 0 && port < a_port) break; if (diff < 0) break; @@ -295,12 +295,11 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - a = &alist->addrs[i].transport.sin6; - a->sin6_port = xport; - a->sin6_addr.s6_addr32[0] = xdr[0]; - a->sin6_addr.s6_addr32[1] = xdr[1]; - a->sin6_addr.s6_addr32[2] = xdr[2]; - a->sin6_addr.s6_addr32[3] = xdr[3]; + srx = &alist->addrs[i]; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(port); + memcpy(&srx->transport.sin6.sin6_addr, xdr, 16); alist->nr_addrs++; } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index f3d0bef16d78..6127f0fcd62c 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -514,6 +514,8 @@ static int afs_alloc_anon_key(struct afs_cell *cell) */ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) { + struct hlist_node **p; + struct afs_cell *pcell; int ret; if (!cell->anonymous_key) { @@ -534,7 +536,18 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) return ret; mutex_lock(&net->proc_cells_lock); - list_add_tail(&cell->proc_link, &net->proc_cells); + for (p = &net->proc_cells.first; *p; p = &(*p)->next) { + pcell = hlist_entry(*p, struct afs_cell, proc_link); + if (strcmp(cell->name, pcell->name) < 0) + break; + } + + cell->proc_link.pprev = p; + cell->proc_link.next = *p; + rcu_assign_pointer(*p, &cell->proc_link.next); + if (cell->proc_link.next) + cell->proc_link.next->pprev = &cell->proc_link.next; + afs_dynroot_mkdir(net, cell); mutex_unlock(&net->proc_cells_lock); return 0; @@ -550,7 +563,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) afs_proc_cell_remove(cell); mutex_lock(&net->proc_cells_lock); - list_del_init(&cell->proc_link); + hlist_del_rcu(&cell->proc_link); afs_dynroot_rmdir(net, cell); mutex_unlock(&net->proc_cells_lock); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 1cde710a8013..f29c6dade7f6 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -265,7 +265,7 @@ int afs_dynroot_populate(struct super_block *sb) return -ERESTARTSYS; net->dynroot_sb = sb; - list_for_each_entry(cell, &net->proc_cells, proc_link) { + hlist_for_each_entry(cell, &net->proc_cells, proc_link) { ret = afs_dynroot_mkdir(net, cell); if (ret < 0) goto error; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 871a228d7f37..72de1f157d20 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -73,12 +73,14 @@ struct afs_addr_list { struct rcu_head rcu; /* Must be first */ refcount_t usage; u32 version; /* Version */ - unsigned short nr_addrs; - unsigned short index; /* Address currently in use */ - unsigned short nr_ipv4; /* Number of IPv4 addresses */ + unsigned char max_addrs; + unsigned char nr_addrs; + unsigned char index; /* Address currently in use */ + unsigned char nr_ipv4; /* Number of IPv4 addresses */ unsigned long probed; /* Mask of servers that have been probed */ unsigned long yfs; /* Mask of servers that are YFS */ struct sockaddr_rxrpc addrs[]; +#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) }; /* @@ -242,7 +244,7 @@ struct afs_net { seqlock_t cells_lock; struct mutex proc_cells_lock; - struct list_head proc_cells; + struct hlist_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one * cell, but in practice, people create aliases and subsets and there's @@ -320,7 +322,7 @@ struct afs_cell { struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager; /* Manager for init/deinit/dns */ - struct list_head proc_link; /* /proc cell list link */ + struct hlist_node proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif diff --git a/fs/afs/main.c b/fs/afs/main.c index e84fe822a960..107427688edd 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -87,7 +87,7 @@ static int __net_init afs_net_init(struct net *net_ns) timer_setup(&net->cells_timer, afs_cells_timer, 0); mutex_init(&net->proc_cells_lock); - INIT_LIST_HEAD(&net->proc_cells); + INIT_HLIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); net->fs_servers = RB_ROOT; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 476dcbb79713..9101f62707af 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -33,9 +33,8 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m) static int afs_proc_cells_show(struct seq_file *m, void *v) { struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); - struct afs_net *net = afs_seq2net(m); - if (v == &net->proc_cells) { + if (v == SEQ_START_TOKEN) { /* display header on line 1 */ seq_puts(m, "USE NAME\n"); return 0; @@ -50,12 +49,12 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { rcu_read_lock(); - return seq_list_start_head(&afs_seq2net(m)->proc_cells, *_pos); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos); } static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &afs_seq2net(m)->proc_cells, pos); + return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos); } static void afs_proc_cells_stop(struct seq_file *m, void *v) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 35f2ae30f31f..77a83790a31f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply[0] = NULL; - /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the * call because the work item may be queued again. diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index af2b17b21b94..95983c744164 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -343,7 +343,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); @@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping) while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { + pgoff_t nr_pages = 1; + for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; @@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping) xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); - if (entry) + if (entry) { page = dax_busy_page(entry); + /* + * Account for multi-order entries at + * the end of the pagevec. + */ + if (i + 1 >= pagevec_count(&pvec)) + nr_pages = 1UL << dax_radix_order(entry); + } put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) @@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - index++; + index += nr_pages; if (page) break; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index defc2168de91..f58c0cacc531 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 83bfe04456b6..c550512ce335 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -70,20 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie) } /* - * initialise an cookie jar slab element prior to any use - */ -void fscache_cookie_init_once(void *_cookie) -{ - struct fscache_cookie *cookie = _cookie; - - memset(cookie, 0, sizeof(*cookie)); - spin_lock_init(&cookie->lock); - spin_lock_init(&cookie->stores_lock); - INIT_HLIST_HEAD(&cookie->backing_objects); -} - -/* - * Set the index key in a cookie. The cookie struct has space for a 12-byte + * Set the index key in a cookie. The cookie struct has space for a 16-byte * key plus length and hash, but if that's not big enough, it's instead a * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then * the key data. @@ -93,20 +80,18 @@ static int fscache_set_key(struct fscache_cookie *cookie, { unsigned long long h; u32 *buf; + int bufs; int i; - cookie->key_len = index_key_len; + bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); if (index_key_len > sizeof(cookie->inline_key)) { - buf = kzalloc(index_key_len, GFP_KERNEL); + buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; cookie->key = buf; } else { buf = (u32 *)cookie->inline_key; - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; } memcpy(buf, index_key, index_key_len); @@ -116,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie, */ h = (unsigned long)cookie->parent; h += index_key_len + cookie->type; - for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++) + + for (i = 0; i < bufs; i++) h += buf[i]; cookie->key_hash = h ^ (h >> 32); @@ -161,7 +147,7 @@ struct fscache_cookie *fscache_alloc_cookie( struct fscache_cookie *cookie; /* allocate and initialise a cookie */ - cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); if (!cookie) return NULL; @@ -192,6 +178,9 @@ struct fscache_cookie *fscache_alloc_cookie( cookie->netfs_data = netfs_data; cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); cookie->type = def->type; + spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f83328a7f048..d6209022e965 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object( extern struct kmem_cache *fscache_cookie_jar; extern void fscache_free_cookie(struct fscache_cookie *); -extern void fscache_cookie_init_once(void *); extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, const struct fscache_cookie_def *, const void *, size_t, diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 7dce110bf17d..30ad89db1efc 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -143,9 +143,7 @@ static int __init fscache_init(void) fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", sizeof(struct fscache_cookie), - 0, - 0, - fscache_cookie_init_once); + 0, 0, NULL); if (!fscache_cookie_jar) { pr_notice("Failed to allocate a cookie jar\n"); ret = -ENOMEM; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 03128ed1f34e..84544a4f012d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1057,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } } release_metapath(&mp); - if (gfs2_is_jdata(ip)) + if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8e712b614e6e..933aac5da193 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +#endif static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index bf000c8aeffb..fec62e9dfbe6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2337,8 +2337,8 @@ late_initcall(ubifs_init); static void __exit ubifs_exit(void) { - WARN_ON(list_empty(&ubifs_infos)); - WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + WARN_ON(!list_empty(&ubifs_infos)); + WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0); dbg_debugfs_exit(); ubifs_compressors_exit(); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5289e22cb081..42ea7bab9144 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1220,35 +1220,92 @@ retry: return 0; } +/* Unlock both inodes after they've been prepped for a range clone. */ +STATIC void +xfs_reflink_remap_unlock( + struct file *file_in, + struct file *file_out) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + bool same_inode = (inode_in == inode_out); + + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); + if (!same_inode) + inode_unlock_shared(inode_in); +} + /* - * Link a range of blocks from one file to another. + * If we're reflinking to a point past the destination file's EOF, we must + * zero any speculative post-EOF preallocations that sit between the old EOF + * and the destination file offset. */ -int -xfs_reflink_remap_range( +static int +xfs_reflink_zero_posteof( + struct xfs_inode *ip, + loff_t pos) +{ + loff_t isize = i_size_read(VFS_I(ip)); + + if (pos <= isize) + return 0; + + trace_xfs_zero_eof(ip, isize, pos - isize); + return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, + &xfs_iomap_ops); +} + +/* + * Prepare two files for range cloning. Upon a successful return both inodes + * will have the iolock and mmaplock held, the page cache of the out file will + * be truncated, and any leases on the out file will have been broken. This + * function borrows heavily from xfs_file_aio_write_checks. + * + * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't + * checked that the bytes beyond EOF physically match. Hence we cannot use the + * EOF block in the source dedupe range because it's not a complete block match, + * hence can introduce a corruption into the file that has it's block replaced. + * + * In similar fashion, the VFS file cloning also allows partial EOF blocks to be + * "block aligned" for the purposes of cloning entire files. However, if the + * source file range includes the EOF block and it lands within the existing EOF + * of the destination file, then we can expose stale data from beyond the source + * file EOF in the destination file. + * + * XFS doesn't support partial block sharing, so in both cases we have check + * these cases ourselves. For dedupe, we can simply round the length to dedupe + * down to the previous whole block and ignore the partial EOF block. While this + * means we can't dedupe the last block of a file, this is an acceptible + * tradeoff for simplicity on implementation. + * + * For cloning, we want to share the partial EOF block if it is also the new EOF + * block of the destination file. If the partial EOF block lies inside the + * existing destination EOF, then we have to abort the clone to avoid exposing + * stale data in the destination file. Hence we reject these clone attempts with + * -EINVAL in this case. + */ +STATIC int +xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + u64 *len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); - struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); - xfs_fileoff_t sfsbno, dfsbno; - xfs_filblks_t fsblen; - xfs_extlen_t cowextsize; + u64 blkmask = i_blocksize(inode_in) - 1; ssize_t ret; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return -EOPNOTSUPP; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - /* Lock both files against IO */ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); if (ret) @@ -1270,33 +1327,115 @@ xfs_reflink_remap_range( goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, - &len, is_dedupe); + len, is_dedupe); if (ret <= 0) goto out_unlock; + /* + * If the dedupe data matches, chop off the partial EOF block + * from the source file so we don't try to dedupe the partial + * EOF block. + */ + if (is_dedupe) { + *len &= ~blkmask; + } else if (*len & blkmask) { + /* + * The user is attempting to share a partial EOF block, + * if it's inside the destination EOF then reject it. + */ + if (pos_out + *len < i_size_read(inode_out)) { + ret = -EINVAL; + goto out_unlock; + } + } + /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - /* - * Clear out post-eof preallocations because we don't have page cache - * backing the delayed allocations and they'll never get freed on - * their own. + * Zero existing post-eof speculative preallocations in the destination + * file. */ - if (xfs_can_free_eofblocks(dest, true)) { - ret = xfs_free_eofblocks(dest); - if (ret) - goto out_unlock; - } + ret = xfs_reflink_zero_posteof(dest, pos_out); + if (ret) + goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + *len) - 1); + + /* If we're altering the file contents... */ + if (!is_dedupe) { + /* + * ...update the timestamps (which will grab the ilock again + * from xfs_fs_dirty_inode, so we have to call it before we + * take the ilock). + */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + goto out_unlock; + } + + /* + * ...clear the security bits if the process is not being run + * by root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + goto out_unlock; + } + + return 1; +out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + return ret; +} + +/* + * Link a range of blocks from one file to another. + */ +int +xfs_reflink_remap_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + struct xfs_mount *mp = src->i_mount; + xfs_fileoff_t sfsbno, dfsbno; + xfs_filblks_t fsblen; + xfs_extlen_t cowextsize; + ssize_t ret; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Prepare and then clone file data. */ + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, + &len, is_dedupe); + if (ret <= 0) + return ret; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); @@ -1305,10 +1444,6 @@ xfs_reflink_remap_range( if (ret) goto out_unlock; - /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file @@ -1325,12 +1460,7 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_SHARED); - inode_unlock(inode_out); - if (!same_inode) - inode_unlock_shared(inode_in); + xfs_reflink_remap_unlock(file_in, file_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; |