summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c10
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c129
-rw-r--r--fs/btrfs/extent-tree.c37
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h10
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/inode-map.c9
-rw-r--r--fs/btrfs/inode-map.h1
-rw-r--r--fs/btrfs/inode.c24
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/raid56.c100
-rw-r--r--fs/btrfs/scrub.c4
-rw-r--r--fs/btrfs/super.c29
-rw-r--r--fs/btrfs/volumes.c107
-rw-r--r--fs/dlm/user.c2
-rw-r--r--fs/ext4/crypto.c6
-rw-r--r--fs/ext4/crypto_key.c4
-rw-r--r--fs/ext4/ext4.h99
-rw-r--r--fs/ext4/extents.c153
-rw-r--r--fs/ext4/file.c82
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c268
-rw-r--r--fs/ext4/ioctl.c376
-rw-r--r--fs/ext4/namei.c34
-rw-r--r--fs/ext4/super.c97
-rw-r--r--fs/ext4/truncate.h2
-rw-r--r--fs/filesystems.c6
-rw-r--r--fs/ocfs2/dlmglue.c6
-rw-r--r--fs/pipe.c47
-rw-r--r--fs/proc/task_mmu.c12
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/xfs/libxfs/xfs_format.h11
-rw-r--r--fs/xfs/libxfs/xfs_fs.h38
-rw-r--r--fs/xfs/xfs_buf.c10
-rw-r--r--fs/xfs/xfs_inode.c60
-rw-r--r--fs/xfs/xfs_ioctl.c92
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_trans_ail.c1
41 files changed, 1309 insertions, 597 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 08405a3da6b1..b90cd3776f8e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -560,13 +560,13 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
*/
static void __merge_refs(struct list_head *head, int mode)
{
- struct __prelim_ref *ref1;
+ struct __prelim_ref *pos1;
- list_for_each_entry(ref1, head, list) {
- struct __prelim_ref *ref2 = ref1, *tmp;
+ list_for_each_entry(pos1, head, list) {
+ struct __prelim_ref *pos2 = pos1, *tmp;
- list_for_each_entry_safe_continue(ref2, tmp, head, list) {
- struct __prelim_ref *xchg;
+ list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+ struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
struct extent_inode_elem *eie;
if (!ref_for_same_block(ref1, ref2))
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 97ad9bbeb35d..bfe4a337fb4d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1614,7 +1614,7 @@ struct btrfs_fs_info {
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
- struct rw_semaphore delayed_iput_sem;
+ struct mutex cleaner_delayed_iput_mutex;
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
@@ -3641,6 +3641,7 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
int __get_raid_index(u64 flags);
int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const u64 type);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1e668fb7dd4c..cbb7dbfb3fff 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -614,7 +614,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
em = lookup_extent_mapping(em_tree, start, (u64)-1);
if (!em)
break;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e99ccd6ffb2c..dd08e29f5117 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,6 +55,12 @@
#include <asm/cpufeature.h>
#endif
+#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
+ BTRFS_HEADER_FLAG_RELOC |\
+ BTRFS_SUPER_FLAG_ERROR |\
+ BTRFS_SUPER_FLAG_SEEDING |\
+ BTRFS_SUPER_FLAG_METADUMP)
+
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
@@ -1583,8 +1589,23 @@ int btrfs_init_fs_root(struct btrfs_root *root)
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto free_writers;
+
+ mutex_lock(&root->objectid_mutex);
+ ret = btrfs_find_highest_objectid(root,
+ &root->highest_objectid);
+ if (ret) {
+ mutex_unlock(&root->objectid_mutex);
+ goto free_root_dev;
+ }
+
+ ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+ mutex_unlock(&root->objectid_mutex);
+
return 0;
+free_root_dev:
+ free_anon_bdev(root->anon_dev);
free_writers:
btrfs_free_subvolume_writers(root->subv_writers);
fail:
@@ -1786,7 +1807,10 @@ static int cleaner_kthread(void *arg)
goto sleep;
}
+ mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
btrfs_run_delayed_iputs(root);
+ mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -2556,8 +2580,8 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
+ mutex_init(&fs_info->cleaner_delayed_iput_mutex);
seqlock_init(&fs_info->profiles_lock);
- init_rwsem(&fs_info->delayed_iput_sem);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
@@ -2742,26 +2766,6 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
- /*
- * Leafsize and nodesize were always equal, this is only a sanity check.
- */
- if (le32_to_cpu(disk_super->__unused_leafsize) !=
- btrfs_super_nodesize(disk_super)) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksizes don't match. node %d leaf %d\n",
- btrfs_super_nodesize(disk_super),
- le32_to_cpu(disk_super->__unused_leafsize));
- err = -EINVAL;
- goto fail_alloc;
- }
- if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksize (%d) was too large\n",
- btrfs_super_nodesize(disk_super));
- err = -EINVAL;
- goto fail_alloc;
- }
-
features = btrfs_super_incompat_flags(disk_super);
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
@@ -2833,17 +2837,6 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
- goto fail_sb_buffer;
- }
-
- if (sectorsize != PAGE_SIZE) {
- printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
- "found on %s\n", (unsigned long)sectorsize, sb->s_id);
- goto fail_sb_buffer;
- }
-
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
@@ -2915,6 +2908,18 @@ retry_root_backup:
tree_root->commit_root = btrfs_root_node(tree_root);
btrfs_set_root_refs(&tree_root->root_item, 1);
+ mutex_lock(&tree_root->objectid_mutex);
+ ret = btrfs_find_highest_objectid(tree_root,
+ &tree_root->highest_objectid);
+ if (ret) {
+ mutex_unlock(&tree_root->objectid_mutex);
+ goto recovery_tree_root;
+ }
+
+ ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+ mutex_unlock(&tree_root->objectid_mutex);
+
ret = btrfs_read_roots(fs_info, tree_root);
if (ret)
goto recovery_tree_root;
@@ -4018,8 +4023,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
struct btrfs_super_block *sb = fs_info->super_copy;
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ printk(KERN_ERR "BTRFS: no valid FS found\n");
+ ret = -EINVAL;
+ }
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+ printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4037,31 +4051,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
}
/*
- * The common minimum, we don't know if we can trust the nodesize/sectorsize
- * items yet, they'll be verified later. Issue just a warning.
+ * Check sectorsize and nodesize first, other check will need it.
+ * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
*/
- if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+ if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+ ret = -EINVAL;
+ }
+ /* Only PAGE SIZE is supported yet */
+ if (sectorsize != PAGE_CACHE_SIZE) {
+ printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+ sectorsize, PAGE_CACHE_SIZE);
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+ nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+ ret = -EINVAL;
+ }
+ if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+ printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+ le32_to_cpu(sb->__unused_leafsize),
+ nodesize);
+ ret = -EINVAL;
+ }
+
+ /* Root alignment check */
+ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
btrfs_super_root(sb));
- if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
btrfs_super_chunk_root(sb));
- if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
- printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
- btrfs_super_log_root(sb));
-
- /*
- * Check the lower bound, the alignment and other constraints are
- * checked later.
- */
- if (btrfs_super_nodesize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
- btrfs_super_nodesize(sb));
ret = -EINVAL;
}
- if (btrfs_super_sectorsize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
- btrfs_super_sectorsize(sb));
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+ printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+ btrfs_super_log_root(sb));
ret = -EINVAL;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 60cc1399c64f..e2287c7c10be 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4139,8 +4139,10 @@ commit_trans:
!atomic_read(&root->fs_info->open_ioctl_trans)) {
need_commit--;
- if (need_commit > 0)
+ if (need_commit > 0) {
+ btrfs_start_delalloc_roots(fs_info, 0, -1);
btrfs_wait_ordered_roots(fs_info, -1);
+ }
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
@@ -4153,11 +4155,12 @@ commit_trans:
if (ret)
return ret;
/*
- * make sure that all running delayed iput are
- * done
+ * The cleaner kthread might still be doing iput
+ * operations. Wait for it to finish so that
+ * more space is released.
*/
- down_write(&root->fs_info->delayed_iput_sem);
- up_write(&root->fs_info->delayed_iput_sem);
+ mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
+ mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
goto again;
} else {
btrfs_end_transaction(trans, root);
@@ -10399,7 +10402,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
num_items = 3 + map->num_stripes;
free_extent_map(em);
@@ -10586,7 +10589,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
- return 1;
+ return -EINVAL;
features = btrfs_super_incompat_flags(disk_super);
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
@@ -10816,3 +10819,23 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
}
return 1;
}
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+ schedule();
+ return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+ while (true) {
+ int ret;
+
+ ret = btrfs_start_write_no_snapshoting(root);
+ if (ret)
+ break;
+ wait_on_atomic_t(&root->will_be_snapshoted,
+ wait_snapshoting_atomic_t,
+ TASK_UNINTERRUPTIBLE);
+ }
+}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 6a98bddd8f33..84fb56d5c018 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -76,7 +76,7 @@ void free_extent_map(struct extent_map *em)
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
- kfree(em->bdev);
+ kfree(em->map_lookup);
kmem_cache_free(extent_map_cache, em);
}
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index b2991fd8583e..eb8b8fae036b 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -32,7 +32,15 @@ struct extent_map {
u64 block_len;
u64 generation;
unsigned long flags;
- struct block_device *bdev;
+ union {
+ struct block_device *bdev;
+
+ /*
+ * used for chunk mappings
+ * flags & EXTENT_FLAG_FS_MAPPING must be set
+ */
+ struct map_lookup *map_lookup;
+ };
atomic_t refs;
unsigned int compress_type;
struct list_head list;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 83d7859d7619..9f5cc1e8e126 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -406,8 +406,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
/* simple helper to fault in pages and copy. This should go away
* and be replaced with calls into generic code.
*/
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
- size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
struct page **prepared_pages,
struct iov_iter *i)
{
@@ -1588,8 +1587,7 @@ again:
ret = 0;
}
- copied = btrfs_copy_from_user(pos, num_pages,
- write_bytes, pages, i);
+ copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
/*
* if we have trouble faulting in the pages, fall
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 8b57c17b3fb3..e50316c4af15 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -515,7 +515,7 @@ out:
return ret;
}
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
{
struct btrfs_path *path;
int ret;
@@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
int ret;
mutex_lock(&root->objectid_mutex);
- if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
- ret = btrfs_find_highest_objectid(root,
- &root->highest_objectid);
- if (ret)
- goto out;
- }
-
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
ret = -ENOSPC;
goto out;
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h
index ddb347bfee23..c8e864b2d530 100644
--- a/fs/btrfs/inode-map.h
+++ b/fs/btrfs/inode-map.h
@@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans);
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 247830107686..1b79dc9b12e4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3134,7 +3134,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- down_read(&fs_info->delayed_iput_sem);
spin_lock(&fs_info->delayed_iput_lock);
while (!list_empty(&fs_info->delayed_iputs)) {
struct btrfs_inode *inode;
@@ -3153,7 +3152,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
spin_lock(&fs_info->delayed_iput_lock);
}
spin_unlock(&fs_info->delayed_iput_lock);
- up_read(&root->fs_info->delayed_iput_sem);
}
/*
@@ -4874,26 +4872,6 @@ next:
return err;
}
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
- schedule();
- return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
- while (true) {
- int ret;
-
- ret = btrfs_start_write_no_snapshoting(root);
- if (ret)
- break;
- wait_on_atomic_t(&root->will_be_snapshoted,
- wait_snapshoting_atomic_t,
- TASK_UNINTERRUPTIBLE);
- }
-}
-
static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4925,7 +4903,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* truncation, it must capture all writes that happened before
* this truncation.
*/
- wait_for_snapshot_creation(root);
+ btrfs_wait_for_snapshot_creation(root);
ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) {
btrfs_end_write_no_snapshoting(root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2a47a3148ec8..9028737ee9b5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -568,6 +568,10 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
}
+ mutex_lock(&new_root->objectid_mutex);
+ new_root->highest_objectid = new_dirid;
+ mutex_unlock(&new_root->objectid_mutex);
+
/*
* insert the directory item
*/
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 6d707545f775..55161369fab1 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -609,13 +609,28 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
return 1;
}
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+ int index)
+{
+ return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+ int index)
+{
+ return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
/*
* helper to index into the pstripe
*/
static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
{
- index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
- return rbio->stripe_pages[index];
+ return rbio_stripe_page(rbio, rbio->nr_data, index);
}
/*
@@ -626,10 +641,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
{
if (rbio->nr_data + 1 == rbio->real_stripes)
return NULL;
-
- index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
- PAGE_CACHE_SHIFT;
- return rbio->stripe_pages[index];
+ return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
}
/*
@@ -889,6 +901,7 @@ static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
int err = bio->bi_error;
+ int max_errors;
if (err)
fail_bio_stripe(rbio, bio);
@@ -901,7 +914,9 @@ static void raid_write_end_io(struct bio *bio)
err = 0;
/* OK, we have read all the stripes we need to. */
- if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+ 0 : rbio->bbio->max_errors;
+ if (atomic_read(&rbio->error) > max_errors)
err = -EIO;
rbio_orig_end_io(rbio, err);
@@ -947,8 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
*/
static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
{
- unsigned long nr = stripe_len * nr_stripes;
- return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+ return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
}
/*
@@ -966,8 +980,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
void *p;
rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
- DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
- GFP_NOFS);
+ DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+ sizeof(long), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -1021,18 +1035,17 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
- ClearPageUptodate(page);
}
return 0;
}
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
{
int i;
struct page *page;
- i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+ i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
for (; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
@@ -1121,18 +1134,6 @@ static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
}
/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
- int index;
- index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
- index += page;
- return rbio->stripe_pages[index];
-}
-
-/*
* helper function to walk our bio list and populate the bio_pages array with
* the result. This seems expensive, but it is faster than constantly
* searching through the bio list as we setup the IO in finish_rmw or stripe
@@ -1175,7 +1176,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
{
struct btrfs_bio *bbio = rbio->bbio;
void *pointers[rbio->real_stripes];
- int stripe_len = rbio->stripe_len;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
@@ -1183,7 +1183,6 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
int q_stripe = -1;
struct bio_list bio_list;
struct bio *bio;
- int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
int ret;
bio_list_init(&bio_list);
@@ -1226,7 +1225,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *p;
/* first collect one page from each data stripe */
for (stripe = 0; stripe < nr_data; stripe++) {
@@ -1268,7 +1267,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
* everything else.
*/
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1292,7 +1291,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
if (!bbio->tgtdev_map[stripe])
continue;
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
@@ -1506,7 +1505,6 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
int pagenr;
int stripe;
struct bio *bio;
@@ -1525,7 +1523,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
* stripe
*/
for (stripe = 0; stripe < rbio->nr_data; stripe++) {
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
/*
* we want to find all the pages missing from
@@ -1801,7 +1799,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
int pagenr, stripe;
void **pointers;
int faila = -1, failb = -1;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
struct page *page;
int err;
int i;
@@ -1824,7 +1821,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
index_rbio_pages(rbio);
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
/*
* Now we just use bitmap to mark the horizontal stripes in
* which we have data when doing parity scrub.
@@ -1935,7 +1932,7 @@ pstripe:
* other endio functions will fiddle the uptodate bits
*/
if (rbio->operation == BTRFS_RBIO_WRITE) {
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; i < rbio->stripe_npages; i++) {
if (faila != -1) {
page = rbio_stripe_page(rbio, faila, i);
SetPageUptodate(page);
@@ -2031,7 +2028,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
int pagenr;
int stripe;
struct bio *bio;
@@ -2055,7 +2051,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
continue;
}
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *p;
/*
@@ -2279,37 +2275,11 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
if (!page)
return -ENOMEM;
rbio->stripe_pages[index] = page;
- ClearPageUptodate(page);
}
}
return 0;
}
-/*
- * end io function used by finish_rmw. When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
- struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
-
- if (bio->bi_error)
- fail_bio_stripe(rbio, bio);
-
- bio_put(bio);
-
- if (!atomic_dec_and_test(&rbio->stripes_pending))
- return;
-
- err = 0;
-
- if (atomic_read(&rbio->error))
- err = -EIO;
-
- rbio_orig_end_io(rbio, err);
-}
-
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
@@ -2462,7 +2432,7 @@ submit_write:
break;
bio->bi_private = rbio;
- bio->bi_end_io = raid_write_parity_end_io;
+ bio->bi_end_io = raid_write_end_io;
submit_bio(WRITE, bio);
}
return;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0c981ebe2acb..b1a68530e911 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2813,7 +2813,7 @@ out:
static inline int scrub_calc_parity_bitmap_len(int nsectors)
{
- return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+ return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
}
static void scrub_parity_get(struct scrub_parity *sparity)
@@ -3458,7 +3458,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
return ret;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (em->start != chunk_offset)
goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9b9eab6d048e..d41e09fe8e38 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -383,6 +383,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
int ret = 0;
char *compress_type;
bool compress_force = false;
+ enum btrfs_compression_type saved_compress_type;
+ bool saved_compress_force;
+ int no_compress = 0;
cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
@@ -462,6 +465,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
/* Fallthrough */
case Opt_compress:
case Opt_compress_type:
+ saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+ info->compress_type : BTRFS_COMPRESS_NONE;
+ saved_compress_force =
+ btrfs_test_opt(root, FORCE_COMPRESS);
if (token == Opt_compress ||
token == Opt_compress_force ||
strcmp(args[0].from, "zlib") == 0) {
@@ -470,6 +477,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
+ no_compress = 0;
} else if (strcmp(args[0].from, "lzo") == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
@@ -477,25 +485,21 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
+ no_compress = 0;
} else if (strncmp(args[0].from, "no", 2) == 0) {
compress_type = "no";
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
compress_force = false;
+ no_compress++;
} else {
ret = -EINVAL;
goto out;
}
if (compress_force) {
- btrfs_set_and_info(root, FORCE_COMPRESS,
- "force %s compression",
- compress_type);
+ btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
} else {
- if (!btrfs_test_opt(root, COMPRESS))
- btrfs_info(root->fs_info,
- "btrfs: use %s compression",
- compress_type);
/*
* If we remount from compress-force=xxx to
* compress=xxx, we need clear FORCE_COMPRESS
@@ -504,6 +508,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
*/
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
}
+ if ((btrfs_test_opt(root, COMPRESS) &&
+ (info->compress_type != saved_compress_type ||
+ compress_force != saved_compress_force)) ||
+ (!btrfs_test_opt(root, COMPRESS) &&
+ no_compress == 1)) {
+ btrfs_info(root->fs_info,
+ "%s %s compression",
+ (compress_force) ? "force" : "use",
+ compress_type);
+ }
+ compress_force = false;
break;
case Opt_ssd:
btrfs_set_and_info(root, SSD,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c32abbca9d77..366b335946fa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
[BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
[BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
@@ -233,6 +233,7 @@ static struct btrfs_device *__alloc_device(void)
spin_lock_init(&dev->reada_lock);
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
+ btrfs_device_data_ordered_init(dev);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
@@ -1183,7 +1184,7 @@ again:
struct map_lookup *map;
int i;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
u64 end;
@@ -2755,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
@@ -3751,7 +3752,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
btrfs_warn(fs_info,
- "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+ "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
bctl->meta.target, bctl->data.target);
}
@@ -4718,7 +4719,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
goto error;
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = start;
em->len = num_bytes;
em->block_start = 0;
@@ -4813,7 +4814,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
stripe_size = em->orig_block_len;
@@ -4968,7 +4969,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
if (!em)
return 1;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->missing) {
miss_ndevs++;
@@ -5048,7 +5049,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
return 1;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5084,7 +5085,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
free_extent_map(em);
@@ -5105,7 +5106,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
@@ -5264,7 +5265,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
offset = logical - em->start;
stripe_len = map->stripe_len;
@@ -5378,35 +5379,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* target drive.
*/
for (i = 0; i < tmp_num_stripes; i++) {
- if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
- /*
- * In case of DUP, in order to keep it
- * simple, only add the mirror with the
- * lowest physical address
- */
- if (found &&
- physical_of_found <=
- tmp_bbio->stripes[i].physical)
- continue;
- index_srcdev = i;
- found = 1;
- physical_of_found =
- tmp_bbio->stripes[i].physical;
- }
+ if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+ continue;
+
+ /*
+ * In case of DUP, in order to keep it simple, only add
+ * the mirror with the lowest physical address
+ */
+ if (found &&
+ physical_of_found <= tmp_bbio->stripes[i].physical)
+ continue;
+
+ index_srcdev = i;
+ found = 1;
+ physical_of_found = tmp_bbio->stripes[i].physical;
}
- if (found) {
- mirror_num = index_srcdev + 1;
- patch_the_first_stripe_for_dev_replace = 1;
- physical_to_patch_in_first_stripe = physical_of_found;
- } else {
+ btrfs_put_bbio(tmp_bbio);
+
+ if (!found) {
WARN_ON(1);
ret = -EIO;
- btrfs_put_bbio(tmp_bbio);
goto out;
}
- btrfs_put_bbio(tmp_bbio);
+ mirror_num = index_srcdev + 1;
+ patch_the_first_stripe_for_dev_replace = 1;
+ physical_to_patch_in_first_stripe = physical_of_found;
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
@@ -5806,7 +5805,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
free_extent_map(em);
return -EIO;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
length = em->len;
rmap_len = map->stripe_len;
@@ -6069,7 +6068,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
- if (bbio->raid_map) {
+ if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+ ((rw & WRITE) || (mirror_num > 1))) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
if (rw & WRITE) {
@@ -6210,6 +6210,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_map *em;
u64 logical;
u64 length;
+ u64 stripe_len;
u64 devid;
u8 uuid[BTRFS_UUID_SIZE];
int num_stripes;
@@ -6218,6 +6219,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ /* Validation check */
+ if (!num_stripes) {
+ btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+ num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk)) {
+ btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6234,7 +6266,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em = alloc_extent_map();
if (!em)
return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
@@ -6242,7 +6273,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = logical;
em->len = length;
em->orig_start = 0;
@@ -6944,7 +6975,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
/* In order to kick the device replace finish process */
lock_chunks(root);
list_for_each_entry(em, &transaction->pending_chunks, list) {
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 1925d6d222b8..58c2f4a21b7f 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -516,7 +516,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
return -EINVAL;
kbuf = memdup_user_nul(buf, count);
- if (!IS_ERR(kbuf))
+ if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
if (check_version(kbuf)) {
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 1a0835073663..c8021208a7eb 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -384,14 +384,12 @@ int ext4_decrypt(struct page *page)
EXT4_DECRYPT, page->index, page, page);
}
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t pblk = ext4_ext_pblock(ex);
- unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0;
#if 0
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index c5882b36e558..9a16d1e75a49 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -213,9 +213,11 @@ retry:
res = -ENOKEY;
goto out;
}
+ down_read(&keyring_key->sem);
ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
res = -EINVAL;
+ up_read(&keyring_key->sem);
goto out;
}
master_key = (struct ext4_encryption_key *)ukp->data;
@@ -226,10 +228,12 @@ retry:
"ext4: key size incorrect: %d\n",
master_key->size);
res = -ENOKEY;
+ up_read(&keyring_key->sem);
goto out;
}
res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
+ up_read(&keyring_key->sem);
if (res)
goto out;
got_key:
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7ca4e87144..1c127213363a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -378,14 +378,22 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
+ EXT4_IMMUTABLE_FL | \
+ EXT4_APPEND_FL | \
+ EXT4_NODUMP_FL | \
+ EXT4_NOATIME_FL | \
+ EXT4_PROJINHERIT_FL)
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -555,10 +563,12 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
- /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
+ /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO 0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
+ EXT4_GET_BLOCKS_ZERO)
/*
* The bit position of these flags must not overlap with any of the
@@ -616,6 +626,46 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+ __u32 fsx_xflags; /* xflags field value (get/set) */
+ __u32 fsx_extsize; /* extsize field value (get/set)*/
+ __u32 fsx_nextents; /* nextents field value (get) */
+ __u32 fsx_projid; /* project identifier (get/set) */
+ unsigned char fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
+#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
+#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
+#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
+#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
+#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
+#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
@@ -910,6 +960,15 @@ struct ext4_inode_info {
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
+ /*
+ * i_mmap_sem is for serializing page faults with truncate / punch hole
+ * operations. We have to make sure that new page cannot be faulted in
+ * a section of the inode that is being punched. We cannot easily use
+ * i_data_sem for this since we need protection for the whole punch
+ * operation and i_data_sem ranks below transaction start so we have
+ * to occasionally drop it.
+ */
+ struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
@@ -993,6 +1052,7 @@ struct ext4_inode_info {
/* Encryption params */
struct ext4_crypt_info *i_crypt_info;
#endif
+ kprojid_t i_projid;
};
/*
@@ -1248,7 +1308,7 @@ struct ext4_super_block {
#endif
/* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
/*
* fourth extended-fs super-block data in memory
@@ -1754,7 +1814,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
- EXT4_FEATURE_RO_COMPAT_QUOTA)
+ EXT4_FEATURE_RO_COMPAT_QUOTA |\
+ EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1796,6 +1857,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+/*
+ * Default project ID
+ */
+#define EXT4_DEF_PROJID 0
+
#define EXT4_DEF_INODE_READAHEAD_BLKS 32
/*
@@ -2234,7 +2300,8 @@ void ext4_restore_control_page(struct page *data_page);
struct page *ext4_encrypt(struct inode *inode,
struct page *plaintext_page);
int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void);
@@ -2440,8 +2507,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
@@ -2484,9 +2551,13 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2848,6 +2919,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
return changed;
}
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len);
+
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
@@ -2986,8 +3060,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
- struct inode *inode);
+ struct inode *dir, struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 551353b1b17a..b52fea3b7219 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3119,19 +3119,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
- int ret;
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
-
- if (ext4_encrypted_inode(inode))
- return ext4_encrypted_zeroout(inode, ex);
-
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
- if (ret > 0)
- ret = 0;
-
- return ret;
+ return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+ ee_len);
}
/*
@@ -4052,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
}
/* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) {
+ if (flags & EXT4_GET_BLOCKS_ZERO) {
+ if (allocated > map->m_len)
+ allocated = map->m_len;
+ err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+ allocated);
+ if (err < 0)
+ goto out2;
+ }
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
ppath);
if (ret >= 0) {
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
/*
* credits to insert 1 extent into extent tree
*/
@@ -4752,8 +4748,6 @@ retry:
goto retry;
}
- ext4_inode_resume_unlocked_dio(inode);
-
return ret > 0 ? ret2 : ret;
}
@@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
int partial_begin, partial_end;
loff_t start, end;
ext4_lblk_t lblk;
- struct address_space *mapping = inode->i_mapping;
unsigned int blkbits = inode->i_blkbits;
trace_ext4_zero_range(inode, offset, len, mode);
@@ -4786,17 +4779,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
/*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- ret = filemap_write_and_wait_range(mapping, offset,
- offset + len - 1);
- if (ret)
- return ret;
- }
-
- /*
* Round up offset. This is not fallocate, we neet to zero out
* blocks, so convert interior block aligned part of the range to
* unwritten and possibly manually zero out unaligned parts of the
@@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
/* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits)) >> blkbits,
new_size, flags, mode);
if (ret)
- goto out_mutex;
+ goto out_dio;
}
@@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE);
- /* Now release the pages and zero block aligned part of pages*/
+ /*
+ * Prevent page faults from reinstantiating pages we have
+ * released from page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ ret = ext4_update_disksize_before_punch(inode, offset, len);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ goto out_dio;
+ }
+ /* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret)
goto out_dio;
}
@@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ ext4_inode_resume_unlocked_dio(inode);
if (ret)
goto out;
@@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- /*
- * Need to round down offset to be aligned with page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
mutex_lock(&inode->i_mutex);
-
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
@@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down offset to be aligned with page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /*
+ * Write tail of the last page before removed range since it will get
+ * removed from the page cache below.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+ if (ret)
+ goto out_mmap;
+ /*
+ * Write data that will be shifted to preserve them when discarding
+ * page cache below. We are also protected from pages becoming dirty
+ * by i_mmap_sem.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
@@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- /*
- * Need to round down to align start offset to page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
mutex_lock(&inode->i_mutex);
-
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
@@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down to align start offset to page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /* Write out all dirty pages */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
/* Expand file to avoid data loss if there is error while shifting */
@@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 113837e7ba98..749b222e6498 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -193,43 +193,35 @@ out:
}
#ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
- struct inode *inode = bh->b_assoc_map->host;
- /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
- loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
- int err;
- if (!uptodate)
- return;
- WARN_ON(!buffer_unwritten(bh));
- err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int result;
handle_t *handle = NULL;
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = __dax_fault(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
@@ -246,44 +238,86 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
result = __dax_pmd_fault(vma, addr, pmd, flags,
- ext4_get_block_dax, ext4_end_io_unwritten);
+ ext4_dax_mmap_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ int err;
+ struct inode *inode = file_inode(vma->vm_file);
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(inode->i_sb);
+
+ return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
+ int ret = VM_FAULT_NOPAGE;
+ loff_t size;
+
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (vmf->pgoff >= size)
+ ret = VM_FAULT_SIGBUS;
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(sb);
+
+ return ret;
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault,
.page_mkwrite = ext4_dax_mkwrite,
- .pfn_mkwrite = dax_pfn_mkwrite,
+ .pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
#endif
static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = filemap_fault,
+ .fault = ext4_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
};
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 1b8024d26f65..3fcfd50a2e8a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -799,6 +799,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+ ei->i_projid = EXT4_I(dir)->i_projid;
+ else
+ ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
err = dquot_initialize(inode);
if (err)
goto out;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d884989cc83d..dfe3b9bafc0d 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
- struct inode *dir = d_inode(dentry->d_parent);
int err;
struct ext4_dir_entry_2 *de;
@@ -1245,12 +1244,11 @@ out:
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
int ret, inline_size;
void *inline_start;
struct ext4_iloc iloc;
- struct inode *dir = d_inode(dentry->d_parent);
ret = ext4_get_inode_loc(dir, &iloc);
if (ret)
@@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
- ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
@@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
- ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir,
inode, &iloc, inline_start,
inline_size);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3bd912df6bf..d964195ea0e2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -383,6 +383,21 @@ static int __check_block_validity(struct inode *inode, const char *func,
return 0;
}
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+ ext4_lblk_t len)
+{
+ int ret;
+
+ if (ext4_encrypted_inode(inode))
+ return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+ ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
#define check_block_validity(inode, map) \
__check_block_validity((inode), __func__, __LINE__, (map))
@@ -403,8 +418,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
* out taking i_data_sem. So at the time the unwritten extent
* could be converted.
*/
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- down_read(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -412,8 +426,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- up_read((&EXT4_I(inode)->i_data_sem));
+ up_read((&EXT4_I(inode)->i_data_sem));
/*
* We don't check m_len because extent will be collpased in status
@@ -509,8 +522,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* Try to see if we can get the block without requesting a new
* file system block.
*/
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- down_read(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -541,8 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (ret < 0)
retval = ret;
}
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- up_read((&EXT4_I(inode)->i_data_sem));
+ up_read((&EXT4_I(inode)->i_data_sem));
found:
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
@@ -626,13 +637,29 @@ found:
}
/*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+ * use them before they are really zeroed.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED &&
+ map->m_flags & EXT4_MAP_NEW) {
+ ret = ext4_issue_zeroout(inode, map->m_lblk,
+ map->m_pblk, map->m_len);
+ if (ret) {
+ retval = ret;
+ goto out_sem;
+ }
+ }
+
+ /*
* If the extent has been zeroed out, we don't need to update
* extent status tree.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
if (ext4_es_is_written(&es))
- goto has_zeroout;
+ goto out_sem;
}
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -643,11 +670,13 @@ found:
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
- if (ret < 0)
+ if (ret < 0) {
retval = ret;
+ goto out_sem;
+ }
}
-has_zeroout:
+out_sem:
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
@@ -674,7 +703,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
- if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+ if (flags && !handle) {
/* Direct IO write... */
if (map.m_len > DIO_MAX_BLOCKS)
map.m_len = DIO_MAX_BLOCKS;
@@ -694,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
- if (IS_DAX(inode) && buffer_unwritten(bh)) {
- /*
- * dgc: I suspect unwritten conversion on ext4+DAX is
- * fundamentally broken here when there are concurrent
- * read/write in progress on this inode.
- */
- WARN_ON_ONCE(io_end);
- bh->b_assoc_map = inode->i_mapping;
- bh->b_private = (void *)(unsigned long)iblock;
- }
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -879,9 +898,6 @@ int do_journal_get_write_access(handle_t *handle,
return ret;
}
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
-
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
@@ -3054,25 +3070,96 @@ int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+ int ret;
+
+ ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_NO_LOCK);
+ ret = _ext4_get_block(inode, iblock, bh_result, 0);
+ /*
+ * Blocks should have been preallocated! ext4_file_write_iter() checks
+ * that.
+ */
+ WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+ return ret;
}
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
- int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
- if (create)
- flags |= EXT4_GET_BLOCKS_CREATE;
- ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+ int ret, err;
+ int credits;
+ struct ext4_map_blocks map;
+ handle_t *handle = NULL;
+ int flags = 0;
+
+ ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result, flags);
+ map.m_lblk = iblock;
+ map.m_len = bh_result->b_size >> inode->i_blkbits;
+ credits = ext4_chunk_trans_blocks(inode, map.m_len);
+ if (create) {
+ flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ return ret;
+ }
+ }
+
+ ret = ext4_map_blocks(handle, inode, &map, flags);
+ if (create) {
+ err = ext4_journal_stop(handle);
+ if (ret >= 0 && err < 0)
+ ret = err;
+ }
+ if (ret <= 0)
+ goto out;
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ int err2;
+
+ /*
+ * We are protected by i_mmap_sem so we know block cannot go
+ * away from under us even though we dropped i_data_sem.
+ * Convert extent to written and write zeros there.
+ *
+ * Note: We may get here even when create == 0.
+ */
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ err = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (err < 0)
+ ret = err;
+ err2 = ext4_journal_stop(handle);
+ if (err2 < 0 && ret > 0)
+ ret = err2;
+ }
+out:
+ WARN_ON_ONCE(ret == 0 && create);
+ if (ret > 0) {
+ map_bh(bh_result, inode->i_sb, map.m_pblk);
+ bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+ map.m_flags;
+ /*
+ * At least for now we have to clear BH_New so that DAX code
+ * doesn't attempt to zero blocks again in a racy way.
+ */
+ bh_result->b_state &= ~(1 << BH_New);
+ bh_result->b_size = map.m_len << inode->i_blkbits;
+ ret = 0;
+ }
+ return ret;
}
+#endif
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
@@ -3143,10 +3230,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
/* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private);
- if (overwrite) {
- down_read(&EXT4_I(inode)->i_data_sem);
+ if (overwrite)
mutex_unlock(&inode->i_mutex);
- }
/*
* We could direct write to holes and fallocate.
@@ -3189,7 +3274,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
if (overwrite) {
- get_block_func = ext4_get_block_write_nolock;
+ get_block_func = ext4_get_block_overwrite;
} else {
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
@@ -3245,10 +3330,8 @@ retake_lock:
if (iov_iter_rw(iter) == WRITE)
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
- if (overwrite) {
- up_read(&EXT4_I(inode)->i_data_sem);
+ if (overwrite)
mutex_lock(&inode->i_mutex);
- }
return ret;
}
@@ -3559,6 +3642,35 @@ int ext4_can_truncate(struct inode *inode)
}
/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len)
+{
+ handle_t *handle;
+ loff_t size = i_size_read(inode);
+
+ WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ if (offset > size || offset + len < size)
+ return 0;
+
+ if (EXT4_I(inode)->i_disksize >= size)
+ return 0;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ext4_update_i_disksize(inode, size);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+
+ return 0;
+}
+
+/*
* ext4_punch_hole: punches a hole in a file by releaseing the blocks
* associated with the given offset and length
*
@@ -3623,17 +3735,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
/* Now release the pages and zero block aligned part of pages*/
- if (last_block_offset > first_block_offset)
+ if (last_block_offset > first_block_offset) {
+ ret = ext4_update_disksize_before_punch(inode, offset, length);
+ if (ret)
+ goto out_dio;
truncate_pagecache_range(inode, first_block_offset,
last_block_offset);
-
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
+ }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode);
@@ -3680,16 +3801,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- /* Now release the pages again to reduce race window */
- if (last_block_offset > first_block_offset)
- truncate_pagecache_range(inode, first_block_offset,
- last_block_offset);
-
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
out_stop:
ext4_journal_stop(handle);
out_dio:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
@@ -4076,6 +4193,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
EXT4_I(inode)->i_inline_off = 0;
}
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ return -EOPNOTSUPP;
+ *projid = EXT4_I(inode)->i_projid;
+ return 0;
+}
+
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct ext4_iloc iloc;
@@ -4087,6 +4212,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
int block;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
inode = iget_locked(sb, ino);
if (!inode)
@@ -4136,12 +4262,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+ else
+ i_projid = EXT4_DEF_PROJID;
+
if (!(test_opt(inode->i_sb, NO_UID32))) {
i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
@@ -4440,6 +4574,7 @@ static int ext4_do_update_inode(handle_t *handle,
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
spin_lock(&ei->i_raw_lock);
@@ -4452,6 +4587,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
+ i_projid = from_kprojid(&init_user_ns, ei->i_projid);
if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4529,6 +4665,15 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ i_projid != EXT4_DEF_PROJID);
+
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ raw_inode->i_projid = cpu_to_le32(i_projid);
+
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
@@ -4824,6 +4969,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} else
ext4_wait_for_tail_page_commit(inode);
}
+ down_write(&EXT4_I(inode)->i_mmap_sem);
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
@@ -4831,6 +4977,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
truncate_pagecache(inode, inode->i_size);
if (shrink)
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
if (!rc) {
@@ -5279,6 +5426,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&
@@ -5348,6 +5497,19 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(ret);
out:
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ int err;
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = filemap_fault(vma, vmf);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+
+ return err;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5e872fd40e5e..2b0cb84255eb 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/random.h>
+#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -202,6 +203,238 @@ static int uuid_is_zero(__u8 u[16])
return 1;
}
+static int ext4_ioctl_setflags(struct inode *inode,
+ unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ handle_t *handle = NULL;
+ int err = EPERM, migrate = 0;
+ struct ext4_iloc iloc;
+ unsigned int oldflags, mask, i;
+ unsigned int jflag;
+
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ goto flags_out;
+
+ oldflags = ei->i_flags;
+
+ /* The JOURNAL_DATA flag is modifiable only by root */
+ jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ goto flags_out;
+ }
+
+ /*
+ * The JOURNAL_DATA flag can only be changed by
+ * the relevant capability.
+ */
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if (!capable(CAP_SYS_RESOURCE))
+ goto flags_out;
+ }
+ if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+ migrate = 1;
+
+ if (flags & EXT4_EOFBLOCKS_FL) {
+ /* we don't support adding EOFBLOCKS flag */
+ if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+ err = -EOPNOTSUPP;
+ goto flags_out;
+ }
+ } else if (oldflags & EXT4_EOFBLOCKS_FL)
+ ext4_truncate(inode);
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto flags_out;
+ }
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto flags_err;
+
+ for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+ if (!(mask & EXT4_FL_USER_MODIFIABLE))
+ continue;
+ if (mask & flags)
+ ext4_set_inode_flag(inode, i);
+ else
+ ext4_clear_inode_flag(inode, i);
+ }
+
+ ext4_set_inode_flags(inode);
+ inode->i_ctime = ext4_current_time(inode);
+
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+ ext4_journal_stop(handle);
+ if (err)
+ goto flags_out;
+
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+ err = ext4_change_inode_journal_flag(inode, jflag);
+ if (err)
+ goto flags_out;
+ if (migrate) {
+ if (flags & EXT4_EXTENTS_FL)
+ err = ext4_ext_migrate(inode);
+ else
+ err = ext4_ind_migrate(inode);
+ }
+
+flags_out:
+ return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+ struct inode *inode = file_inode(filp);
+ struct super_block *sb = inode->i_sb;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int err, rc;
+ handle_t *handle;
+ kprojid_t kprojid;
+ struct ext4_iloc iloc;
+ struct ext4_inode *raw_inode;
+
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (projid != EXT4_DEF_PROJID)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+ }
+
+ if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+ return -EOPNOTSUPP;
+
+ kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+ if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+ return 0;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ err = -EPERM;
+ mutex_lock(&inode->i_mutex);
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ goto out_unlock;
+
+ err = ext4_get_inode_loc(inode, &iloc);
+ if (err)
+ goto out_unlock;
+
+ raw_inode = ext4_raw_inode(&iloc);
+ if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+ err = -EOVERFLOW;
+ brelse(iloc.bh);
+ goto out_unlock;
+ }
+ brelse(iloc.bh);
+
+ dquot_initialize(inode);
+
+ handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+ EXT4_QUOTA_INIT_BLOCKS(sb) +
+ EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_unlock;
+ }
+
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto out_stop;
+
+ if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+ struct dquot *transfer_to[MAXQUOTAS] = { };
+
+ transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+ if (transfer_to[PRJQUOTA]) {
+ err = __dquot_transfer(inode, transfer_to);
+ dqput(transfer_to[PRJQUOTA]);
+ if (err)
+ goto out_dirty;
+ }
+ }
+ EXT4_I(inode)->i_projid = kprojid;
+ inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+ rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ if (!err)
+ err = rc;
+out_stop:
+ ext4_journal_stop(handle);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
+ mnt_drop_write_file(filp);
+ return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+ if (projid != EXT4_DEF_PROJID)
+ return -EOPNOTSUPP;
+ return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+ __u32 xflags = 0;
+
+ if (iflags & EXT4_SYNC_FL)
+ xflags |= FS_XFLAG_SYNC;
+ if (iflags & EXT4_IMMUTABLE_FL)
+ xflags |= FS_XFLAG_IMMUTABLE;
+ if (iflags & EXT4_APPEND_FL)
+ xflags |= FS_XFLAG_APPEND;
+ if (iflags & EXT4_NODUMP_FL)
+ xflags |= FS_XFLAG_NODUMP;
+ if (iflags & EXT4_NOATIME_FL)
+ xflags |= FS_XFLAG_NOATIME;
+ if (iflags & EXT4_PROJINHERIT_FL)
+ xflags |= FS_XFLAG_PROJINHERIT;
+ return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+ unsigned long iflags = 0;
+
+ if (xflags & FS_XFLAG_SYNC)
+ iflags |= EXT4_SYNC_FL;
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ iflags |= EXT4_IMMUTABLE_FL;
+ if (xflags & FS_XFLAG_APPEND)
+ iflags |= EXT4_APPEND_FL;
+ if (xflags & FS_XFLAG_NODUMP)
+ iflags |= EXT4_NODUMP_FL;
+ if (xflags & FS_XFLAG_NOATIME)
+ iflags |= EXT4_NOATIME_FL;
+ if (xflags & FS_XFLAG_PROJINHERIT)
+ iflags |= EXT4_PROJINHERIT_FL;
+
+ return iflags;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -217,11 +450,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
- handle_t *handle = NULL;
- int err, migrate = 0;
- struct ext4_iloc iloc;
- unsigned int oldflags, mask, i;
- unsigned int jflag;
+ int err;
if (!inode_owner_or_capable(inode))
return -EACCES;
@@ -235,89 +464,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ext4_mask_flags(inode->i_mode, flags);
- err = -EPERM;
mutex_lock(&inode->i_mutex);
- /* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
- goto flags_out;
-
- oldflags = ei->i_flags;
-
- /* The JOURNAL_DATA flag is modifiable only by root */
- jflag = flags & EXT4_JOURNAL_DATA_FL;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto flags_out;
- }
-
- /*
- * The JOURNAL_DATA flag can only be changed by
- * the relevant capability.
- */
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
- if (!capable(CAP_SYS_RESOURCE))
- goto flags_out;
- }
- if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
- migrate = 1;
-
- if (flags & EXT4_EOFBLOCKS_FL) {
- /* we don't support adding EOFBLOCKS flag */
- if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
- err = -EOPNOTSUPP;
- goto flags_out;
- }
- } else if (oldflags & EXT4_EOFBLOCKS_FL)
- ext4_truncate(inode);
-
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto flags_out;
- }
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto flags_err;
-
- for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
- if (!(mask & EXT4_FL_USER_MODIFIABLE))
- continue;
- if (mask & flags)
- ext4_set_inode_flag(inode, i);
- else
- ext4_clear_inode_flag(inode, i);
- }
-
- ext4_set_inode_flags(inode);
- inode->i_ctime = ext4_current_time(inode);
-
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
- ext4_journal_stop(handle);
- if (err)
- goto flags_out;
-
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
- err = ext4_change_inode_journal_flag(inode, jflag);
- if (err)
- goto flags_out;
- if (migrate) {
- if (flags & EXT4_EXTENTS_FL)
- err = ext4_ext_migrate(inode);
- else
- err = ext4_ind_migrate(inode);
- }
-
-flags_out:
+ err = ext4_ioctl_setflags(inode, flags);
mutex_unlock(&inode->i_mutex);
mnt_drop_write_file(filp);
return err;
@@ -689,6 +837,60 @@ encryption_policy_out:
return -EOPNOTSUPP;
#endif
}
+ case EXT4_IOC_FSGETXATTR:
+ {
+ struct fsxattr fa;
+
+ memset(&fa, 0, sizeof(struct fsxattr));
+ ext4_get_inode_flags(ei);
+ fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+ EXT4_I(inode)->i_projid);
+ }
+
+ if (copy_to_user((struct fsxattr __user *)arg,
+ &fa, sizeof(fa)))
+ return -EFAULT;
+ return 0;
+ }
+ case EXT4_IOC_FSSETXATTR:
+ {
+ struct fsxattr fa;
+ int err;
+
+ if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+ sizeof(fa)))
+ return -EFAULT;
+
+ /* Make sure caller has proper permission */
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+ flags = ext4_mask_flags(inode->i_mode, flags);
+
+ mutex_lock(&inode->i_mutex);
+ flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+ (flags & EXT4_FL_XFLAG_VISIBLE);
+ err = ext4_ioctl_setflags(inode, flags);
+ mutex_unlock(&inode->i_mutex);
+ mnt_drop_write_file(filp);
+ if (err)
+ return err;
+
+ err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+ if (err)
+ return err;
+
+ return 0;
+ }
default:
return -ENOTTY;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f27e0c2598c5..854f75de4599 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode);
+ struct inode *dir, struct inode *inode);
/* checksumming functions */
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
@@ -1928,10 +1928,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
* directory, and adds the dentry to the indexed directory.
*/
static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode, struct buffer_head *bh)
{
- struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh2;
struct dx_root *root;
struct dx_frame frames[2], *frame;
@@ -2086,8 +2085,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (ext4_has_inline_data(dir)) {
- retval = ext4_try_add_inline_entry(handle, &fname,
- dentry, inode);
+ retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
if (retval < 0)
goto out;
if (retval == 1) {
@@ -2097,7 +2095,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
if (is_dx(dir)) {
- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+ retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
@@ -2119,7 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (blocks == 1 && !dx_fallback &&
ext4_has_feature_dir_index(sb)) {
- retval = make_indexed_dir(handle, &fname, dentry,
+ retval = make_indexed_dir(handle, &fname, dir,
inode, bh);
bh = NULL; /* make_indexed_dir releases bh */
goto out;
@@ -2154,12 +2152,11 @@ out:
* Returns 0 for success, or a negative error value
*/
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
int err;
@@ -3212,6 +3209,12 @@ static int ext4_link(struct dentry *old_dentry,
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -3492,6 +3495,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
int credits;
u8 old_file_type;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3701,6 +3709,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.inode)))
return -EPERM;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)) ||
+ (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(old_dir)->i_projid,
+ EXT4_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f1b56ff01208..00c98fab6333 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -80,6 +80,36 @@ static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ * page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ * i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ * i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ * transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ * transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
@@ -958,6 +988,7 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
+ init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
}
@@ -1066,8 +1097,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1100,6 +1131,7 @@ static const struct dquot_operations ext4_quota_operations = {
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -2526,6 +2558,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"without CONFIG_QUOTA");
return 0;
}
+ if (ext4_has_feature_project(sb) && !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
#endif /* CONFIG_QUOTA */
return 1;
}
@@ -3654,7 +3692,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
- sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -4790,6 +4828,48 @@ restore_opts:
return err;
}
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dq_data_lock);
+
+ limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+ dquot->dq_dqb.dqb_bsoftlimit :
+ dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ if (limit && buf->f_blocks > limit) {
+ curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ buf->f_blocks = limit;
+ buf->f_bfree = buf->f_bavail =
+ (buf->f_blocks > curblock) ?
+ (buf->f_blocks - curblock) : 0;
+ }
+
+ limit = dquot->dq_dqb.dqb_isoftlimit ?
+ dquot->dq_dqb.dqb_isoftlimit :
+ dquot->dq_dqb.dqb_ihardlimit;
+ if (limit && buf->f_files > limit) {
+ buf->f_files = limit;
+ buf->f_ffree =
+ (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+ (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ }
+
+ spin_unlock(&dq_data_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -4822,6 +4902,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
@@ -4986,7 +5071,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
BUG_ON(!ext4_has_feature_quota(sb));
@@ -5014,7 +5100,8 @@ static int ext4_enable_quotas(struct super_block *sb)
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba6670d99..c70d06a383e2 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -10,8 +10,10 @@
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ down_write(&EXT4_I(inode)->i_mmap_sem);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
/*
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 5797d45a78cb..c5618db110be 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -46,9 +46,9 @@ void put_filesystem(struct file_system_type *fs)
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
- for (p=&file_systems; *p; p=&(*p)->next)
- if (strlen((*p)->name) == len &&
- strncmp((*p)->name, name, len) == 0)
+ for (p = &file_systems; *p; p = &(*p)->next)
+ if (strncmp((*p)->name, name, len) == 0 &&
+ !(*p)->name[len])
break;
return p;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f92612e4b9d6..474e57f834e6 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
unsigned int gen;
int noqueue_attempted = 0;
int dlm_locked = 0;
+ int kick_dc = 0;
if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
mlog_errno(-EINVAL);
@@ -1524,7 +1525,12 @@ update_holders:
unlock:
lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+ /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+ kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
+
spin_unlock_irqrestore(&lockres->l_lock, flags);
+ if (kick_dc)
+ ocfs2_wake_downconvert_thread(osb);
out:
/*
* This is helping work around a lock inversion between the page lock
diff --git a/fs/pipe.c b/fs/pipe.c
index 42cf8ddf0e55..ab8dad3ccb6a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
*/
unsigned int pipe_min_size = PAGE_SIZE;
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
/*
* We use a start+len construction, which provides full use of the
* allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
return retval;
}
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+ unsigned long old, unsigned long new)
+{
+ atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+ return pipe_user_pages_soft &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+ return pipe_user_pages_hard &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
if (pipe) {
- pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+ unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+ struct user_struct *user = get_current_user();
+
+ if (!too_many_pipe_buffers_hard(user)) {
+ if (too_many_pipe_buffers_soft(user))
+ pipe_bufs = 1;
+ pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+ }
+
if (pipe->bufs) {
init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1;
- pipe->buffers = PIPE_DEF_BUFFERS;
+ pipe->buffers = pipe_bufs;
+ pipe->user = user;
+ account_pipe_buffers(pipe, 0, pipe_bufs);
mutex_init(&pipe->mutex);
return pipe;
}
+ free_uid(user);
kfree(pipe);
}
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
{
int i;
+ account_pipe_buffers(pipe, pipe->buffers, 0);
+ free_uid(pipe->user);
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
+ account_pipe_buffers(pipe, pipe->buffers, nr_pages);
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
ret = -EPERM;
goto out;
+ } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+ too_many_pipe_buffers_soft(pipe->user)) &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out;
}
ret = pipe_set_size(pipe, nr_pages);
break;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 71ffc91060f6..85d16c67c33e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -602,7 +602,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
@@ -913,7 +914,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
@@ -1187,7 +1189,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
@@ -1519,7 +1522,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *orig_pte;
pte_t *pte;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 05db7473bcb5..c0306ec8ed7b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -288,7 +288,7 @@ static int finish_unfinished(struct super_block *s)
pathrelse(&path);
inode = reiserfs_iget(s, &obj_key);
- if (!inode) {
+ if (IS_ERR_OR_NULL(inode)) {
/*
* the unlink almost completed, it just did not
* manage to remove "save" link and release objectid
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e2536bb1c760..dc97eb21af07 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -984,8 +984,6 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
/*
* Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
*/
#define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */
#define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */
@@ -1026,6 +1024,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY (XFS_DIFLAG2_DAX)
+
+/*
* Inode number format:
* low inopblog bits - offset in block
* next agblklog bits - block number in ag
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b2b73a998d42..fffe3d01bd9f 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -36,40 +36,6 @@ struct dioattr {
#endif
/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
- __u32 fsx_xflags; /* xflags field value (get/set) */
- __u32 fsx_extsize; /* extsize field value (get/set)*/
- __u32 fsx_nextents; /* nextents field value (get) */
- __u32 fsx_projid; /* project identifier (get/set) */
- unsigned char fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
-#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */
-#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
-#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */
-#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
-#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
-#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
-
-/*
* Structure for XFS_IOC_GETBMAP.
* On input, fill in bmv_offset and bmv_length of the first structure
* to indicate the area of interest in the file, and bmv_entries with
@@ -514,8 +480,8 @@ typedef struct xfs_swapext
#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index daed4bfb85b2..435c7de42e5f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1527,6 +1527,16 @@ xfs_wait_buftarg(
LIST_HEAD(dispose);
int loop = 0;
+ /*
+ * We need to flush the buffer workqueue to ensure that all IO
+ * completion processing is 100% done. Just waiting on buffer locks is
+ * not sufficient for async IO as the reference count held over IO is
+ * not released until after the buffer lock is dropped. Hence we need to
+ * ensure here that all reference counts have been dropped before we
+ * start walking the LRU list.
+ */
+ drain_workqueue(btp->bt_mount->m_buf_workqueue);
+
/* loop until there is nothing left on the lru list. */
while (list_lru_count(&btp->bt_lru)) {
list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ae3758a90ed6..ceba1a83cacc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -610,60 +610,69 @@ __xfs_iflock(
STATIC uint
_xfs_dic2xflags(
- __uint16_t di_flags)
+ __uint16_t di_flags,
+ uint64_t di_flags2,
+ bool has_attr)
{
uint flags = 0;
if (di_flags & XFS_DIFLAG_ANY) {
if (di_flags & XFS_DIFLAG_REALTIME)
- flags |= XFS_XFLAG_REALTIME;
+ flags |= FS_XFLAG_REALTIME;
if (di_flags & XFS_DIFLAG_PREALLOC)
- flags |= XFS_XFLAG_PREALLOC;
+ flags |= FS_XFLAG_PREALLOC;
if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= XFS_XFLAG_IMMUTABLE;
+ flags |= FS_XFLAG_IMMUTABLE;
if (di_flags & XFS_DIFLAG_APPEND)
- flags |= XFS_XFLAG_APPEND;
+ flags |= FS_XFLAG_APPEND;
if (di_flags & XFS_DIFLAG_SYNC)
- flags |= XFS_XFLAG_SYNC;
+ flags |= FS_XFLAG_SYNC;
if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= XFS_XFLAG_NOATIME;
+ flags |= FS_XFLAG_NOATIME;
if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= XFS_XFLAG_NODUMP;
+ flags |= FS_XFLAG_NODUMP;
if (di_flags & XFS_DIFLAG_RTINHERIT)
- flags |= XFS_XFLAG_RTINHERIT;
+ flags |= FS_XFLAG_RTINHERIT;
if (di_flags & XFS_DIFLAG_PROJINHERIT)
- flags |= XFS_XFLAG_PROJINHERIT;
+ flags |= FS_XFLAG_PROJINHERIT;
if (di_flags & XFS_DIFLAG_NOSYMLINKS)
- flags |= XFS_XFLAG_NOSYMLINKS;
+ flags |= FS_XFLAG_NOSYMLINKS;
if (di_flags & XFS_DIFLAG_EXTSIZE)
- flags |= XFS_XFLAG_EXTSIZE;
+ flags |= FS_XFLAG_EXTSIZE;
if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
- flags |= XFS_XFLAG_EXTSZINHERIT;
+ flags |= FS_XFLAG_EXTSZINHERIT;
if (di_flags & XFS_DIFLAG_NODEFRAG)
- flags |= XFS_XFLAG_NODEFRAG;
+ flags |= FS_XFLAG_NODEFRAG;
if (di_flags & XFS_DIFLAG_FILESTREAM)
- flags |= XFS_XFLAG_FILESTREAM;
+ flags |= FS_XFLAG_FILESTREAM;
}
+ if (di_flags2 & XFS_DIFLAG2_ANY) {
+ if (di_flags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ }
+
+ if (has_attr)
+ flags |= FS_XFLAG_HASATTR;
+
return flags;
}
uint
xfs_ip2xflags(
- xfs_inode_t *ip)
+ struct xfs_inode *ip)
{
- xfs_icdinode_t *dic = &ip->i_d;
+ struct xfs_icdinode *dic = &ip->i_d;
- return _xfs_dic2xflags(dic->di_flags) |
- (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
}
uint
xfs_dic2xflags(
- xfs_dinode_t *dip)
+ struct xfs_dinode *dip)
{
- return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
- (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+ be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
}
/*
@@ -862,7 +871,8 @@ xfs_ialloc(
case S_IFREG:
case S_IFDIR:
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
- uint di_flags = 0;
+ uint64_t di_flags2 = 0;
+ uint di_flags = 0;
if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
@@ -898,7 +908,11 @@ xfs_ialloc(
di_flags |= XFS_DIFLAG_NODEFRAG;
if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
ip->i_d.di_flags |= di_flags;
+ ip->i_d.di_flags2 |= di_flags2;
}
/* FALLTHROUGH */
case S_IFLNK:
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d42738deec6d..478d04e07f95 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -859,25 +859,25 @@ xfs_merge_ioc_xflags(
unsigned int xflags = start;
if (flags & FS_IMMUTABLE_FL)
- xflags |= XFS_XFLAG_IMMUTABLE;
+ xflags |= FS_XFLAG_IMMUTABLE;
else
- xflags &= ~XFS_XFLAG_IMMUTABLE;
+ xflags &= ~FS_XFLAG_IMMUTABLE;
if (flags & FS_APPEND_FL)
- xflags |= XFS_XFLAG_APPEND;
+ xflags |= FS_XFLAG_APPEND;
else
- xflags &= ~XFS_XFLAG_APPEND;
+ xflags &= ~FS_XFLAG_APPEND;
if (flags & FS_SYNC_FL)
- xflags |= XFS_XFLAG_SYNC;
+ xflags |= FS_XFLAG_SYNC;
else
- xflags &= ~XFS_XFLAG_SYNC;
+ xflags &= ~FS_XFLAG_SYNC;
if (flags & FS_NOATIME_FL)
- xflags |= XFS_XFLAG_NOATIME;
+ xflags |= FS_XFLAG_NOATIME;
else
- xflags &= ~XFS_XFLAG_NOATIME;
+ xflags &= ~FS_XFLAG_NOATIME;
if (flags & FS_NODUMP_FL)
- xflags |= XFS_XFLAG_NODUMP;
+ xflags |= FS_XFLAG_NODUMP;
else
- xflags &= ~XFS_XFLAG_NODUMP;
+ xflags &= ~FS_XFLAG_NODUMP;
return xflags;
}
@@ -945,40 +945,51 @@ xfs_set_diflags(
unsigned int xflags)
{
unsigned int di_flags;
+ uint64_t di_flags2;
/* can't set PREALLOC this way, just preserve it */
di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & XFS_XFLAG_NODUMP)
+ if (xflags & FS_XFLAG_NODUMP)
di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & XFS_XFLAG_NODEFRAG)
+ if (xflags & FS_XFLAG_NODEFRAG)
di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & XFS_XFLAG_FILESTREAM)
+ if (xflags & FS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
if (S_ISDIR(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_RTINHERIT)
+ if (xflags & FS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & XFS_XFLAG_NOSYMLINKS)
+ if (xflags & FS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & XFS_XFLAG_EXTSZINHERIT)
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- if (xflags & XFS_XFLAG_PROJINHERIT)
+ if (xflags & FS_XFLAG_PROJINHERIT)
di_flags |= XFS_DIFLAG_PROJINHERIT;
} else if (S_ISREG(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_REALTIME)
+ if (xflags & FS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & XFS_XFLAG_EXTSIZE)
+ if (xflags & FS_XFLAG_EXTSIZE)
di_flags |= XFS_DIFLAG_EXTSIZE;
}
-
ip->i_d.di_flags = di_flags;
+
+ /* diflags2 only valid for v3 inodes. */
+ if (ip->i_d.di_version < 3)
+ return;
+
+ di_flags2 = 0;
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
+ ip->i_d.di_flags2 = di_flags2;
+
}
STATIC void
@@ -988,22 +999,27 @@ xfs_diflags_to_linux(
struct inode *inode = VFS_I(ip);
unsigned int xflags = xfs_ip2xflags(ip);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
inode->i_flags |= S_APPEND;
else
inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
inode->i_flags |= S_SYNC;
else
inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
else
inode->i_flags &= ~S_NOATIME;
+ if (xflags & FS_XFLAG_DAX)
+ inode->i_flags |= S_DAX;
+ else
+ inode->i_flags &= ~S_DAX;
+
}
static int
@@ -1016,11 +1032,11 @@ xfs_ioctl_setattr_xflags(
/* Can't change realtime flag if any extents are allocated. */
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+ XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
return -EINVAL;
/* If realtime flag is set then must have realtime device */
- if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+ if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
(ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
return -EINVAL;
@@ -1031,7 +1047,7 @@ xfs_ioctl_setattr_xflags(
* we have appropriate permission.
*/
if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
- (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
@@ -1095,8 +1111,8 @@ out_cancel:
* extent size hint validation is somewhat cumbersome. Rules are:
*
* 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
* 4. can only be changed on regular files if no extents are allocated
* 5. can be changed on directories at any time
* 6. extsize hint of 0 turns off hints, clears inode flags.
@@ -1112,10 +1128,10 @@ xfs_ioctl_setattr_check_extsize(
{
struct xfs_mount *mp = ip->i_mount;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
!S_ISDIR(ip->i_d.di_mode))
return -EINVAL;
@@ -1132,7 +1148,7 @@ xfs_ioctl_setattr_check_extsize(
return -EINVAL;
if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
} else {
size = mp->m_sb.sb_blocksize;
@@ -1143,7 +1159,7 @@ xfs_ioctl_setattr_check_extsize(
if (fa->fsx_extsize % size)
return -EINVAL;
} else
- fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
return 0;
}
@@ -1168,7 +1184,7 @@ xfs_ioctl_setattr_check_projid(
if (xfs_get_projid(ip) != fa->fsx_projid)
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+ if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
return -EINVAL;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 06eafafe636e..76b71a1c6c32 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1205,8 +1205,8 @@ xfs_diflags_to_iflags(
inode->i_flags |= S_SYNC;
if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- /* XXX: Also needs an on-disk per inode flag! */
- if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+ ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
inode->i_flags |= S_DAX;
}
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index aa67339b9537..4f18fd92ca13 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,7 +497,6 @@ xfsaild(
long tout = 0; /* milliseconds */
current->flags |= PF_MEMALLOC;
- set_freezable();
while (!kthread_should_stop()) {
if (tout && tout <= 20)
OpenPOWER on IntegriCloud