diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 86 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 2 | ||||
-rw-r--r-- | fs/btrfs/send.c | 77 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 32 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 3 | ||||
-rw-r--r-- | fs/io_uring.c | 3 |
6 files changed, 109 insertions, 94 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index c2a85b587922..a6f7c892cb4a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -439,6 +439,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) ret = -EAGAIN; goto error; } + ret = dio->size; if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -465,7 +466,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) ret = -EAGAIN; goto error; } - ret += bio->bi_iter.bi_size; + ret = dio->size; bio = bio_alloc(gfp, nr_pages); if (!bio) { @@ -1181,8 +1182,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) * Pointer to the block device containing @bdev on success, ERR_PTR() * value on failure. */ -static struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder) +struct block_device *bd_start_claiming(struct block_device *bdev, void *holder) { struct gendisk *disk; struct block_device *whole; @@ -1229,6 +1229,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, return ERR_PTR(err); } } +EXPORT_SYMBOL(bd_start_claiming); + +static void bd_clear_claiming(struct block_device *whole, void *holder) +{ + lockdep_assert_held(&bdev_lock); + /* tell others that we're done */ + BUG_ON(whole->bd_claiming != holder); + whole->bd_claiming = NULL; + wake_up_bit(&whole->bd_claiming, 0); +} + +/** + * bd_finish_claiming - finish claiming of a block device + * @bdev: block device of interest + * @whole: whole block device (returned from bd_start_claiming()) + * @holder: holder that has claimed @bdev + * + * Finish exclusive open of a block device. Mark the device as exlusively + * open by the holder and wake up all waiters for exclusive open to finish. + */ +void bd_finish_claiming(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + spin_lock(&bdev_lock); + BUG_ON(!bd_may_claim(bdev, whole, holder)); + /* + * Note that for a whole device bd_holders will be incremented twice, + * and bd_holder will be set to bd_may_claim before being set to holder + */ + whole->bd_holders++; + whole->bd_holder = bd_may_claim; + bdev->bd_holders++; + bdev->bd_holder = holder; + bd_clear_claiming(whole, holder); + spin_unlock(&bdev_lock); +} +EXPORT_SYMBOL(bd_finish_claiming); + +/** + * bd_abort_claiming - abort claiming of a block device + * @bdev: block device of interest + * @whole: whole block device (returned from bd_start_claiming()) + * @holder: holder that has claimed @bdev + * + * Abort claiming of a block device when the exclusive open failed. This can be + * also used when exclusive open is not actually desired and we just needed + * to block other exclusive openers for a while. + */ +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + spin_lock(&bdev_lock); + bd_clear_claiming(whole, holder); + spin_unlock(&bdev_lock); +} +EXPORT_SYMBOL(bd_abort_claiming); #ifdef CONFIG_SYSFS struct bd_holder_disk { @@ -1698,29 +1754,7 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) /* finish claiming */ mutex_lock(&bdev->bd_mutex); - spin_lock(&bdev_lock); - - if (!res) { - BUG_ON(!bd_may_claim(bdev, whole, holder)); - /* - * Note that for a whole device bd_holders - * will be incremented twice, and bd_holder - * will be set to bd_may_claim before being - * set to holder - */ - whole->bd_holders++; - whole->bd_holder = bd_may_claim; - bdev->bd_holders++; - bdev->bd_holder = holder; - } - - /* tell others that we're done */ - BUG_ON(whole->bd_claiming != holder); - whole->bd_claiming = NULL; - wake_up_bit(&whole->bd_claiming, 0); - - spin_unlock(&bdev_lock); - + bd_finish_claiming(bdev, whole, holder); /* * Block event polling for write claims if requested. Any * write holder makes the write_holder state stick until diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 89116afda7a2..e5d85311d5d5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, ulist_init(roots); ulist_init(tmp); - trans = btrfs_attach_transaction(root); + trans = btrfs_join_transaction_nostart(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 69b59bf75882..c3c0c064c25d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx, { int ret = 0; - if (sctx->cur_ino != sctx->cmp_key->objectid) { - - if (result == BTRFS_COMPARE_TREE_CHANGED) { - struct extent_buffer *leaf_l; - struct extent_buffer *leaf_r; - struct btrfs_file_extent_item *ei_l; - struct btrfs_file_extent_item *ei_r; - - leaf_l = sctx->left_path->nodes[0]; - leaf_r = sctx->right_path->nodes[0]; - ei_l = btrfs_item_ptr(leaf_l, - sctx->left_path->slots[0], - struct btrfs_file_extent_item); - ei_r = btrfs_item_ptr(leaf_r, - sctx->right_path->slots[0], - struct btrfs_file_extent_item); - - /* - * We may have found an extent item that has changed - * only its disk_bytenr field and the corresponding - * inode item was not updated. This case happens due to - * very specific timings during relocation when a leaf - * that contains file extent items is COWed while - * relocation is ongoing and its in the stage where it - * updates data pointers. So when this happens we can - * safely ignore it since we know it's the same extent, - * but just at different logical and physical locations - * (when an extent is fully replaced with a new one, we - * know the generation number must have changed too, - * since snapshot creation implies committing the current - * transaction, and the inode item must have been updated - * as well). - * This replacement of the disk_bytenr happens at - * relocation.c:replace_file_extents() through - * relocation.c:btrfs_reloc_cow_block(). - */ - if (btrfs_file_extent_generation(leaf_l, ei_l) == - btrfs_file_extent_generation(leaf_r, ei_r) && - btrfs_file_extent_ram_bytes(leaf_l, ei_l) == - btrfs_file_extent_ram_bytes(leaf_r, ei_r) && - btrfs_file_extent_compression(leaf_l, ei_l) == - btrfs_file_extent_compression(leaf_r, ei_r) && - btrfs_file_extent_encryption(leaf_l, ei_l) == - btrfs_file_extent_encryption(leaf_r, ei_r) && - btrfs_file_extent_other_encoding(leaf_l, ei_l) == - btrfs_file_extent_other_encoding(leaf_r, ei_r) && - btrfs_file_extent_type(leaf_l, ei_l) == - btrfs_file_extent_type(leaf_r, ei_r) && - btrfs_file_extent_disk_bytenr(leaf_l, ei_l) != - btrfs_file_extent_disk_bytenr(leaf_r, ei_r) && - btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) == - btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) && - btrfs_file_extent_offset(leaf_l, ei_l) == - btrfs_file_extent_offset(leaf_r, ei_r) && - btrfs_file_extent_num_bytes(leaf_l, ei_l) == - btrfs_file_extent_num_bytes(leaf_r, ei_r)) - return 0; - } - - inconsistent_snapshot_error(sctx, result, "extent"); - return -EIO; - } + /* + * We have found an extent item that changed without the inode item + * having changed. This can happen either after relocation (where the + * disk_bytenr of an extent item is replaced at + * relocation.c:replace_file_extents()) or after deduplication into a + * file in both the parent and send snapshots (where an extent item can + * get modified or replaced with a new one). Note that deduplication + * updates the inode item, but it only changes the iversion (sequence + * field in the inode item) of the inode, so if a file is deduplicated + * the same amount of times in both the parent and send snapshots, its + * iversion becames the same in both snapshots, whence the inode item is + * the same on both snapshots. + */ + if (sctx->cur_ino != sctx->cmp_key->objectid) + return 0; if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (result != BTRFS_COMPARE_TREE_DELETED) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3b8ae1a8f02d..e3adb714c04b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | __TRANS_ATTACH | - __TRANS_JOIN), + __TRANS_JOIN | + __TRANS_JOIN_NOSTART), [TRANS_STATE_UNBLOCKED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | - __TRANS_JOIN_NOLOCK), + __TRANS_JOIN_NOLOCK | + __TRANS_JOIN_NOSTART), [TRANS_STATE_COMPLETED] = (__TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | - __TRANS_JOIN_NOLOCK), + __TRANS_JOIN_NOLOCK | + __TRANS_JOIN_NOSTART), }; void btrfs_put_transaction(struct btrfs_transaction *transaction) @@ -543,7 +546,8 @@ again: ret = join_transaction(fs_info, type); if (ret == -EBUSY) { wait_current_trans(fs_info); - if (unlikely(type == TRANS_ATTACH)) + if (unlikely(type == TRANS_ATTACH || + type == TRANS_JOIN_NOSTART)) ret = -ENOENT; } } while (ret == -EBUSY); @@ -660,6 +664,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root } /* + * Similar to regular join but it never starts a transaction when none is + * running or after waiting for the current one to finish. + */ +struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root) +{ + return start_transaction(root, 0, TRANS_JOIN_NOSTART, + BTRFS_RESERVE_NO_FLUSH, true); +} + +/* * btrfs_attach_transaction() - catch the running transaction * * It is used when we want to commit the current the transaction, but @@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) } } else { spin_unlock(&fs_info->trans_lock); + /* + * The previous transaction was aborted and was already removed + * from the list of transactions at fs_info->trans_list. So we + * abort to prevent writing a new superblock that reflects a + * corrupt state (pointing to trees with unwritten nodes/leafs). + */ + if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) { + ret = -EROFS; + goto cleanup_transaction; + } } extwriter_counter_dec(cur_trans, trans->type); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 527ea94b57d9..2c5a6f6e5bb0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -94,11 +94,13 @@ struct btrfs_transaction { #define __TRANS_JOIN (1U << 11) #define __TRANS_JOIN_NOLOCK (1U << 12) #define __TRANS_DUMMY (1U << 13) +#define __TRANS_JOIN_NOSTART (1U << 14) #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) #define TRANS_ATTACH (__TRANS_ATTACH) #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) +#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART) #define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH) @@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( int min_factor); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction_barrier( struct btrfs_root *root); diff --git a/fs/io_uring.c b/fs/io_uring.c index 012bc0efb9d3..d542f1cf4428 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1838,6 +1838,7 @@ restart: do { struct sqe_submit *s = &req->submit; const struct io_uring_sqe *sqe = s->sqe; + unsigned int flags = req->flags; /* Ensure we clear previously set non-block flag */ req->rw.ki_flags &= ~IOCB_NOWAIT; @@ -1883,7 +1884,7 @@ restart: kfree(sqe); /* req from defer and link list needn't decrease async cnt */ - if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) + if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) goto out; if (!async_list) |